#Load Library
library(readr)
library(tibble)
library(rsample)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rpart)
library(ggplot2)
library(knitr)
library(purrr)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-3
#Write Function
calc_mode <- function(x){
  
  # List the distinct / unique values
  distinct_values <- unique(x)
  
  # Count the occurrence of each distinct value
  distinct_tabulate <- tabulate(match(x, distinct_values))
  
  # Return the value with the highest occurrence
  distinct_values[which.max(distinct_tabulate)]
}
#Load data
set.seed(42)
df = read.csv("/Users/tsaichichien/data/historic_property_data.csv")
val = read.csv("/Users/tsaichichien/data/predict_property_data.csv")
#Remove some variables
df_new = select(df, -c(meta_cdu,char_tp_dsgn,char_attic_fnsh,char_renovation,char_porch,char_apts,
              ind_arms_length, char_ot_impr, char_use,geo_property_zip,geo_black_perc,geo_other_perc,
              char_cnst_qlty,meta_town_code,meta_certified_est_land,geo_property_city,geo_white_perc,
              geo_his_perc,geo_municipality,ind_large_home,meta_class,meta_certified_est_bldg,geo_tract_pop,
              geo_asian_perc,geo_fips,char_site,meta_deed_type, meta_nbhd, geo_school_hs_district, geo_school_elem_district))
#Check current column names
colnames(df_new)
##  [1] "sale_price"                  "char_hd_sf"                 
##  [3] "char_age"                    "char_ext_wall"              
##  [5] "char_roof_cnst"              "char_rooms"                 
##  [7] "char_beds"                   "char_bsmt"                  
##  [9] "char_bsmt_fin"               "char_heat"                  
## [11] "char_oheat"                  "char_air"                   
## [13] "char_frpl"                   "char_attic_type"            
## [15] "char_fbath"                  "char_hbath"                 
## [17] "char_tp_plan"                "char_gar1_size"             
## [19] "char_gar1_cnst"              "char_gar1_att"              
## [21] "char_gar1_area"              "char_bldg_sf"               
## [23] "char_repair_cnd"             "char_type_resd"             
## [25] "geo_ohare_noise"             "geo_floodplain"             
## [27] "geo_fs_flood_factor"         "geo_fs_flood_risk_direction"
## [29] "geo_withinmr100"             "geo_withinmr101300"         
## [31] "econ_tax_rate"               "econ_midincome"             
## [33] "ind_garage"
#Remove outliers
x<-df_new
outliers1 <- boxplot(df_new$sale_price,plot=FALSE)$out
outliers2 <- boxplot(x$char_bldg_sf,plot=FALSE)$out

x<- x[-which(x$sale_price %in% outliers1),]
x<- x[-which(x$char_bldg_sf %in% outliers2),]

df_new1 = x
#Histogram of Sale Price
hist(df_new1$sale_price)

#mean(df_new1$sale_price)
#Count missing values for each vaiable
colSums(is.na(df_new1))
##                  sale_price                  char_hd_sf 
##                           0                           0 
##                    char_age               char_ext_wall 
##                           0                          31 
##              char_roof_cnst                  char_rooms 
##                          31                           0 
##                   char_beds                   char_bsmt 
##                           0                          30 
##               char_bsmt_fin                   char_heat 
##                          29                          31 
##                  char_oheat                    char_air 
##                         136                          27 
##                   char_frpl             char_attic_type 
##                          29                          30 
##                  char_fbath                  char_hbath 
##                           0                           0 
##                char_tp_plan              char_gar1_size 
##                       14040                          24 
##              char_gar1_cnst               char_gar1_att 
##                        6232                        6230 
##              char_gar1_area                char_bldg_sf 
##                        6230                           0 
##             char_repair_cnd              char_type_resd 
##                          31                          27 
##             geo_ohare_noise              geo_floodplain 
##                          93                          93 
##         geo_fs_flood_factor geo_fs_flood_risk_direction 
##                          93                          93 
##             geo_withinmr100          geo_withinmr101300 
##                          93                          93 
##               econ_tax_rate              econ_midincome 
##                           0                          93 
##                  ind_garage 
##                          24
#Replace missing values
y=df_new1

y$char_ext_wall[is.na(y$char_ext_wall)]<-calc_mode(y$char_ext_wall)
y$char_roof_cnst[is.na(y$char_roof_cnst)]<-calc_mode(y$char_roof_cnst)
y$char_bsmt[is.na(y$char_bsmt)]<-calc_mode(y$char_bsmt)
y$char_bsmt_fin[is.na(y$char_bsmt_fin)]<-calc_mode(y$char_bsmt_fin)
y$char_heat[is.na(y$char_heat)]<-calc_mode(y$char_heat)
y$char_oheat[is.na(y$char_oheat)]<-calc_mode(y$char_oheat)
y$char_air[is.na(y$char_air)]<-calc_mode(y$char_air)
y$char_attic_type[is.na(y$char_attic_type)]<-calc_mode(y$char_attic_type)
y$char_tp_plan[is.na(y$char_tp_plan)]<-calc_mode(y$char_tp_plan)
y$char_gar1_size[is.na(y$char_gar1_size)]<-calc_mode(y$char_gar1_size)
y$char_gar1_cnst[is.na(y$char_gar1_cnst)]<-calc_mode(y$char_gar1_cnst)
y$char_gar1_att[is.na(y$char_gar1_att)]<-calc_mode(y$char_gar1_att)
y$char_gar1_area[is.na(y$char_gar1_area)]<-calc_mode(y$char_gar1_area)
y$char_repair_cnd[is.na(y$char_repair_cnd)]<-calc_mode(y$char_repair_cnd)
y$char_type_resd[is.na(y$char_type_resd)]<-calc_mode(y$char_type_resd)

y$char_frpl[is.na(y$char_frpl)]<-median(y$char_frpl, na.rm = TRUE)
y$econ_midincome[is.na(y$econ_midincome)]<-median(y$econ_midincome, na.rm = TRUE)
y$geo_fs_flood_factor[is.na(y$geo_fs_flood_factor)]<-median(y$geo_fs_flood_factor, na.rm = TRUE)
y$geo_fs_flood_risk_direction[is.na(y$geo_fs_flood_risk_direction)]<-median(y$geo_fs_flood_risk_direction, na.rm = TRUE)

y$geo_floodplain[is.na(y$geo_floodplain)]<-0
y$geo_ohare_noise[is.na(y$geo_ohare_noise)]<-0
y$geo_withinmr100[is.na(y$geo_withinmr100)]<-0
y$geo_withinmr101300[is.na(y$geo_withinmr101300)]<-0
y$ind_garage[is.na(y$ind_garage)]<-0

Linear Regression

#Split data into train and test
df_trn_tst_splity = initial_split(y, prop = 0.70)
df_trn_y = training(df_trn_tst_splity)
df_tst_y = testing(df_trn_tst_splity)

#Build model
lm_full_y = lm(sale_price ~ char_rooms+char_frpl+char_bldg_sf+econ_tax_rate+char_age+char_fbath+econ_midincome+
           as.factor(geo_floodplain)+as.factor(geo_withinmr100)+as.factor(geo_ohare_noise)+as.factor(char_heat)+
           as.factor(char_gar1_att)+as.factor(char_bsmt)+as.factor(char_attic_type)+as.factor(char_tp_plan)+
           char_beds+char_hbath+char_hd_sf+geo_fs_flood_factor+geo_fs_flood_risk_direction+as.factor(geo_withinmr101300)+
             as.factor(ind_garage)+as.factor(char_ext_wall)+as.factor(char_type_resd)+as.factor(char_roof_cnst)+
             as.factor(char_oheat)+as.factor(char_gar1_size)+as.factor(char_gar1_area)+as.factor(char_repair_cnd)+
             as.factor(char_bsmt_fin) +as.factor(char_air),
           df_trn_y)


#Find mse
mean(lm_full_y$residuals^2)
## [1] 9378903988
mse1 = mean((df_tst_y$sale_price - predict.lm(lm_full_y, df_tst_y)) ^ 2)
summary(lm_full_y)
## 
## Call:
## lm(formula = sale_price ~ char_rooms + char_frpl + char_bldg_sf + 
##     econ_tax_rate + char_age + char_fbath + econ_midincome + 
##     as.factor(geo_floodplain) + as.factor(geo_withinmr100) + 
##     as.factor(geo_ohare_noise) + as.factor(char_heat) + as.factor(char_gar1_att) + 
##     as.factor(char_bsmt) + as.factor(char_attic_type) + as.factor(char_tp_plan) + 
##     char_beds + char_hbath + char_hd_sf + geo_fs_flood_factor + 
##     geo_fs_flood_risk_direction + as.factor(geo_withinmr101300) + 
##     as.factor(ind_garage) + as.factor(char_ext_wall) + as.factor(char_type_resd) + 
##     as.factor(char_roof_cnst) + as.factor(char_oheat) + as.factor(char_gar1_size) + 
##     as.factor(char_gar1_area) + as.factor(char_repair_cnd) + 
##     as.factor(char_bsmt_fin) + as.factor(char_air), data = df_trn_y)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -436981  -63571   -5809   53280  507281 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     5.554e+04  2.490e+04   2.231  0.02569 *  
## char_rooms                     -1.557e+03  5.628e+02  -2.766  0.00567 ** 
## char_frpl                       9.968e+03  1.320e+03   7.551 4.43e-14 ***
## char_bldg_sf                    7.238e+01  1.863e+00  38.864  < 2e-16 ***
## econ_tax_rate                  -6.878e+03  1.253e+02 -54.905  < 2e-16 ***
## char_age                        2.307e+02  2.867e+01   8.047 8.75e-16 ***
## char_fbath                      1.268e+04  1.320e+03   9.609  < 2e-16 ***
## econ_midincome                  2.418e+00  2.068e-02 116.965  < 2e-16 ***
## as.factor(geo_floodplain)1     -2.213e+04  5.251e+03  -4.215 2.51e-05 ***
## as.factor(geo_withinmr100)1    -3.186e+04  6.682e+03  -4.768 1.87e-06 ***
## as.factor(geo_ohare_noise)1     5.107e+04  4.562e+03  11.196  < 2e-16 ***
## as.factor(char_heat)2           2.696e+03  1.783e+03   1.512  0.13046    
## as.factor(char_heat)3          -1.649e+04  8.510e+03  -1.938  0.05260 .  
## as.factor(char_heat)4          -1.420e+04  1.212e+04  -1.171  0.24157    
## as.factor(char_gar1_att)2       4.505e+03  1.779e+03   2.533  0.01133 *  
## as.factor(char_bsmt)2          -1.703e+04  1.918e+03  -8.875  < 2e-16 ***
## as.factor(char_bsmt)3          -1.189e+04  1.798e+03  -6.614 3.80e-11 ***
## as.factor(char_bsmt)4          -2.415e+04  2.468e+03  -9.785  < 2e-16 ***
## as.factor(char_attic_type)2     2.161e+03  2.033e+03   1.063  0.28787    
## as.factor(char_attic_type)3     8.123e+02  1.784e+03   0.455  0.64880    
## as.factor(char_tp_plan)2        3.907e+03  4.037e+03   0.968  0.33320    
## char_beds                      -4.443e+03  1.055e+03  -4.210 2.56e-05 ***
## char_hbath                      1.207e+04  1.236e+03   9.768  < 2e-16 ***
## char_hd_sf                      2.669e-01  6.545e-02   4.078 4.55e-05 ***
## geo_fs_flood_factor             3.711e+03  4.535e+02   8.184 2.84e-16 ***
## geo_fs_flood_risk_direction    -2.020e+03  2.093e+03  -0.965  0.33443    
## as.factor(geo_withinmr101300)1 -4.982e+03  2.303e+03  -2.163  0.03051 *  
## as.factor(ind_garage)1         -3.463e+04  2.311e+04  -1.499  0.13396    
## as.factor(char_ext_wall)2       2.916e+03  1.443e+03   2.021  0.04334 *  
## as.factor(char_ext_wall)3       2.730e+03  1.586e+03   1.722  0.08517 .  
## as.factor(char_ext_wall)4       2.933e+04  4.710e+03   6.227 4.82e-10 ***
## as.factor(char_type_resd)2     -4.305e+03  1.757e+03  -2.450  0.01428 *  
## as.factor(char_type_resd)3      8.057e+04  5.143e+03  15.668  < 2e-16 ***
## as.factor(char_type_resd)4      6.682e+03  2.651e+03   2.520  0.01173 *  
## as.factor(char_type_resd)5     -7.806e+02  2.214e+03  -0.353  0.72445    
## as.factor(char_type_resd)6      3.993e+04  3.249e+04   1.229  0.21899    
## as.factor(char_type_resd)7      6.942e+04  2.695e+04   2.576  0.01000 *  
## as.factor(char_type_resd)8      4.399e+04  3.671e+04   1.199  0.23071    
## as.factor(char_type_resd)9      4.933e+04  9.698e+04   0.509  0.61097    
## as.factor(char_roof_cnst)2     -7.375e+02  2.621e+03  -0.281  0.77839    
## as.factor(char_roof_cnst)3     -1.218e+04  1.207e+04  -1.009  0.31278    
## as.factor(char_roof_cnst)4      1.541e+04  8.865e+03   1.738  0.08222 .  
## as.factor(char_roof_cnst)5     -9.701e+03  8.029e+03  -1.208  0.22695    
## as.factor(char_roof_cnst)6      3.015e+04  9.176e+03   3.286  0.00102 ** 
## as.factor(char_oheat)5          1.213e+04  2.994e+03   4.052 5.09e-05 ***
## as.factor(char_gar1_size)2      2.149e+03  2.602e+03   0.826  0.40891    
## as.factor(char_gar1_size)3      9.934e+03  1.640e+03   6.057 1.40e-09 ***
## as.factor(char_gar1_size)4      3.305e+03  3.124e+03   1.058  0.29007    
## as.factor(char_gar1_size)5      2.309e+04  4.506e+03   5.124 3.00e-07 ***
## as.factor(char_gar1_size)6      4.034e+04  1.629e+04   2.476  0.01330 *  
## as.factor(char_gar1_size)7     -4.907e+04  2.322e+04  -2.113  0.03457 *  
## as.factor(char_gar1_size)8     -6.704e+03  1.726e+04  -0.388  0.69773    
## as.factor(char_gar1_area)2     -7.968e+02  2.542e+03  -0.313  0.75391    
## as.factor(char_repair_cnd)2    -2.716e+04  6.653e+03  -4.082 4.47e-05 ***
## as.factor(char_repair_cnd)3    -1.853e+04  1.035e+04  -1.790  0.07350 .  
## as.factor(char_bsmt_fin)2      -1.974e+04  5.271e+03  -3.746  0.00018 ***
## as.factor(char_bsmt_fin)3      -1.208e+04  1.486e+03  -8.134 4.32e-16 ***
## as.factor(char_air)2           -2.135e+04  1.367e+03 -15.619  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 96930 on 31431 degrees of freedom
## Multiple R-squared:  0.5769, Adjusted R-squared:  0.5761 
## F-statistic: 751.8 on 57 and 31431 DF,  p-value: < 2.2e-16

Backward Elimination

lm.step.backward <- step(lm_full_y, direction = "backward")
## Start:  AIC=723157.9
## sale_price ~ char_rooms + char_frpl + char_bldg_sf + econ_tax_rate + 
##     char_age + char_fbath + econ_midincome + as.factor(geo_floodplain) + 
##     as.factor(geo_withinmr100) + as.factor(geo_ohare_noise) + 
##     as.factor(char_heat) + as.factor(char_gar1_att) + as.factor(char_bsmt) + 
##     as.factor(char_attic_type) + as.factor(char_tp_plan) + char_beds + 
##     char_hbath + char_hd_sf + geo_fs_flood_factor + geo_fs_flood_risk_direction + 
##     as.factor(geo_withinmr101300) + as.factor(ind_garage) + as.factor(char_ext_wall) + 
##     as.factor(char_type_resd) + as.factor(char_roof_cnst) + as.factor(char_oheat) + 
##     as.factor(char_gar1_size) + as.factor(char_gar1_area) + as.factor(char_repair_cnd) + 
##     as.factor(char_bsmt_fin) + as.factor(char_air)
## 
##                                 Df  Sum of Sq        RSS    AIC
## - as.factor(char_attic_type)     2 1.1096e+10 2.9534e+14 723155
## - as.factor(char_gar1_area)      1 9.2341e+08 2.9533e+14 723156
## - geo_fs_flood_risk_direction    1 8.7545e+09 2.9534e+14 723157
## - as.factor(char_tp_plan)        1 8.7990e+09 2.9534e+14 723157
## <none>                                        2.9533e+14 723158
## - as.factor(ind_garage)          1 2.1105e+10 2.9535e+14 723158
## - as.factor(char_heat)           3 7.3081e+10 2.9541e+14 723160
## - as.factor(geo_withinmr101300)  1 4.3979e+10 2.9538e+14 723161
## - as.factor(char_gar1_att)       1 6.0268e+10 2.9539e+14 723162
## - char_rooms                     1 7.1911e+10 2.9540e+14 723164
## - as.factor(char_roof_cnst)      5 1.5507e+11 2.9549e+14 723164
## - as.factor(char_repair_cnd)     2 1.6704e+11 2.9550e+14 723172
## - as.factor(char_oheat)          1 1.5429e+11 2.9549e+14 723172
## - char_hd_sf                     1 1.5629e+11 2.9549e+14 723173
## - char_beds                      1 1.6655e+11 2.9550e+14 723174
## - as.factor(geo_floodplain)      1 1.6692e+11 2.9550e+14 723174
## - as.factor(geo_withinmr100)     1 2.1360e+11 2.9555e+14 723179
## - as.factor(char_ext_wall)       3 3.7769e+11 2.9571e+14 723192
## - as.factor(char_gar1_size)      7 6.3707e+11 2.9597e+14 723212
## - char_frpl                      1 5.3576e+11 2.9587e+14 723213
## - char_age                       1 6.0852e+11 2.9594e+14 723221
## - geo_fs_flood_factor            1 6.2937e+11 2.9596e+14 723223
## - as.factor(char_bsmt_fin)       2 6.6498e+11 2.9600e+14 723225
## - char_fbath                     1 8.6757e+11 2.9620e+14 723248
## - char_hbath                     1 8.9660e+11 2.9623e+14 723251
## - as.factor(geo_ohare_noise)     1 1.1778e+12 2.9651e+14 723281
## - as.factor(char_bsmt)           3 1.4972e+12 2.9683e+14 723311
## - as.factor(char_air)            1 2.2923e+12 2.9762e+14 723399
## - as.factor(char_type_resd)      8 3.1398e+12 2.9847e+14 723475
## - char_bldg_sf                   1 1.4192e+13 3.0952e+14 724634
## - econ_tax_rate                  1 2.8326e+13 3.2366e+14 726040
## - econ_midincome                 1 1.2855e+14 4.2388e+14 734534
## 
## Step:  AIC=723155.1
## sale_price ~ char_rooms + char_frpl + char_bldg_sf + econ_tax_rate + 
##     char_age + char_fbath + econ_midincome + as.factor(geo_floodplain) + 
##     as.factor(geo_withinmr100) + as.factor(geo_ohare_noise) + 
##     as.factor(char_heat) + as.factor(char_gar1_att) + as.factor(char_bsmt) + 
##     as.factor(char_tp_plan) + char_beds + char_hbath + char_hd_sf + 
##     geo_fs_flood_factor + geo_fs_flood_risk_direction + as.factor(geo_withinmr101300) + 
##     as.factor(ind_garage) + as.factor(char_ext_wall) + as.factor(char_type_resd) + 
##     as.factor(char_roof_cnst) + as.factor(char_oheat) + as.factor(char_gar1_size) + 
##     as.factor(char_gar1_area) + as.factor(char_repair_cnd) + 
##     as.factor(char_bsmt_fin) + as.factor(char_air)
## 
##                                 Df  Sum of Sq        RSS    AIC
## - as.factor(char_gar1_area)      1 8.8360e+08 2.9534e+14 723153
## - geo_fs_flood_risk_direction    1 8.6854e+09 2.9535e+14 723154
## - as.factor(char_tp_plan)        1 8.7293e+09 2.9535e+14 723154
## <none>                                        2.9534e+14 723155
## - as.factor(ind_garage)          1 2.1154e+10 2.9536e+14 723155
## - as.factor(char_heat)           3 7.3142e+10 2.9542e+14 723157
## - as.factor(geo_withinmr101300)  1 4.3714e+10 2.9539e+14 723158
## - as.factor(char_gar1_att)       1 6.0715e+10 2.9540e+14 723160
## - char_rooms                     1 7.1397e+10 2.9541e+14 723161
## - as.factor(char_roof_cnst)      5 1.5375e+11 2.9550e+14 723161
## - as.factor(char_repair_cnd)     2 1.6679e+11 2.9551e+14 723169
## - as.factor(char_oheat)          1 1.5583e+11 2.9550e+14 723170
## - char_hd_sf                     1 1.5827e+11 2.9550e+14 723170
## - char_beds                      1 1.6630e+11 2.9551e+14 723171
## - as.factor(geo_floodplain)      1 1.6710e+11 2.9551e+14 723171
## - as.factor(geo_withinmr100)     1 2.1446e+11 2.9556e+14 723176
## - as.factor(char_ext_wall)       3 3.8085e+11 2.9572e+14 723190
## - as.factor(char_gar1_size)      7 6.3466e+11 2.9598e+14 723209
## - char_frpl                      1 5.3916e+11 2.9588e+14 723210
## - geo_fs_flood_factor            1 6.2751e+11 2.9597e+14 723220
## - char_age                       1 6.3696e+11 2.9598e+14 723221
## - as.factor(char_bsmt_fin)       2 6.6923e+11 2.9601e+14 723222
## - char_fbath                     1 8.6254e+11 2.9621e+14 723245
## - char_hbath                     1 8.9736e+11 2.9624e+14 723249
## - as.factor(geo_ohare_noise)     1 1.1808e+12 2.9652e+14 723279
## - as.factor(char_bsmt)           3 1.4957e+12 2.9684e+14 723308
## - as.factor(char_air)            1 2.2964e+12 2.9764e+14 723397
## - as.factor(char_type_resd)      8 3.1538e+12 2.9850e+14 723474
## - char_bldg_sf                   1 1.4197e+13 3.0954e+14 724632
## - econ_tax_rate                  1 2.8369e+13 3.2371e+14 726041
## - econ_midincome                 1 1.2899e+14 4.2433e+14 734564
## 
## Step:  AIC=723153.2
## sale_price ~ char_rooms + char_frpl + char_bldg_sf + econ_tax_rate + 
##     char_age + char_fbath + econ_midincome + as.factor(geo_floodplain) + 
##     as.factor(geo_withinmr100) + as.factor(geo_ohare_noise) + 
##     as.factor(char_heat) + as.factor(char_gar1_att) + as.factor(char_bsmt) + 
##     as.factor(char_tp_plan) + char_beds + char_hbath + char_hd_sf + 
##     geo_fs_flood_factor + geo_fs_flood_risk_direction + as.factor(geo_withinmr101300) + 
##     as.factor(ind_garage) + as.factor(char_ext_wall) + as.factor(char_type_resd) + 
##     as.factor(char_roof_cnst) + as.factor(char_oheat) + as.factor(char_gar1_size) + 
##     as.factor(char_repair_cnd) + as.factor(char_bsmt_fin) + as.factor(char_air)
## 
##                                 Df  Sum of Sq        RSS    AIC
## - as.factor(char_tp_plan)        1 8.6392e+09 2.9535e+14 723152
## - geo_fs_flood_risk_direction    1 8.6744e+09 2.9535e+14 723152
## <none>                                        2.9534e+14 723153
## - as.factor(ind_garage)          1 2.1137e+10 2.9537e+14 723153
## - as.factor(char_heat)           3 7.3461e+10 2.9542e+14 723155
## - as.factor(geo_withinmr101300)  1 4.3665e+10 2.9539e+14 723156
## - as.factor(char_gar1_att)       1 6.0992e+10 2.9541e+14 723158
## - char_rooms                     1 7.1681e+10 2.9542e+14 723159
## - as.factor(char_roof_cnst)      5 1.5376e+11 2.9550e+14 723160
## - as.factor(char_repair_cnd)     2 1.6699e+11 2.9551e+14 723167
## - as.factor(char_oheat)          1 1.5571e+11 2.9550e+14 723168
## - char_hd_sf                     1 1.5747e+11 2.9550e+14 723168
## - as.factor(geo_floodplain)      1 1.6708e+11 2.9551e+14 723169
## - char_beds                      1 1.7084e+11 2.9552e+14 723169
## - as.factor(geo_withinmr100)     1 2.1461e+11 2.9556e+14 723174
## - as.factor(char_ext_wall)       3 3.8074e+11 2.9573e+14 723188
## - as.factor(char_gar1_size)      7 6.3926e+11 2.9598e+14 723207
## - char_frpl                      1 5.3960e+11 2.9588e+14 723209
## - geo_fs_flood_factor            1 6.2772e+11 2.9597e+14 723218
## - char_age                       1 6.4341e+11 2.9599e+14 723220
## - as.factor(char_bsmt_fin)       2 6.6868e+11 2.9601e+14 723220
## - char_fbath                     1 8.6201e+11 2.9621e+14 723243
## - char_hbath                     1 8.9802e+11 2.9624e+14 723247
## - as.factor(geo_ohare_noise)     1 1.1802e+12 2.9652e+14 723277
## - as.factor(char_bsmt)           3 1.4981e+12 2.9684e+14 723306
## - as.factor(char_air)            1 2.2970e+12 2.9764e+14 723395
## - as.factor(char_type_resd)      8 3.2538e+12 2.9860e+14 723482
## - char_bldg_sf                   1 1.4272e+13 3.0962e+14 724637
## - econ_tax_rate                  1 2.8369e+13 3.2371e+14 726039
## - econ_midincome                 1 1.2911e+14 4.2445e+14 734571
## 
## Step:  AIC=723152.1
## sale_price ~ char_rooms + char_frpl + char_bldg_sf + econ_tax_rate + 
##     char_age + char_fbath + econ_midincome + as.factor(geo_floodplain) + 
##     as.factor(geo_withinmr100) + as.factor(geo_ohare_noise) + 
##     as.factor(char_heat) + as.factor(char_gar1_att) + as.factor(char_bsmt) + 
##     char_beds + char_hbath + char_hd_sf + geo_fs_flood_factor + 
##     geo_fs_flood_risk_direction + as.factor(geo_withinmr101300) + 
##     as.factor(ind_garage) + as.factor(char_ext_wall) + as.factor(char_type_resd) + 
##     as.factor(char_roof_cnst) + as.factor(char_oheat) + as.factor(char_gar1_size) + 
##     as.factor(char_repair_cnd) + as.factor(char_bsmt_fin) + as.factor(char_air)
## 
##                                 Df  Sum of Sq        RSS    AIC
## - geo_fs_flood_risk_direction    1 8.5507e+09 2.9536e+14 723151
## <none>                                        2.9535e+14 723152
## - as.factor(ind_garage)          1 2.1226e+10 2.9537e+14 723152
## - as.factor(char_heat)           3 7.3030e+10 2.9543e+14 723154
## - as.factor(geo_withinmr101300)  1 4.3814e+10 2.9540e+14 723155
## - as.factor(char_gar1_att)       1 6.1318e+10 2.9541e+14 723157
## - char_rooms                     1 7.1545e+10 2.9542e+14 723158
## - as.factor(char_roof_cnst)      5 1.5145e+11 2.9550e+14 723158
## - as.factor(char_repair_cnd)     2 1.6236e+11 2.9552e+14 723165
## - as.factor(char_oheat)          1 1.5532e+11 2.9551e+14 723167
## - char_hd_sf                     1 1.5698e+11 2.9551e+14 723167
## - as.factor(geo_floodplain)      1 1.6630e+11 2.9552e+14 723168
## - char_beds                      1 1.7002e+11 2.9552e+14 723168
## - as.factor(geo_withinmr100)     1 2.1401e+11 2.9557e+14 723173
## - as.factor(char_ext_wall)       3 3.7966e+11 2.9573e+14 723187
## - as.factor(char_gar1_size)      7 6.4187e+11 2.9599e+14 723206
## - char_frpl                      1 5.3832e+11 2.9589e+14 723207
## - geo_fs_flood_factor            1 6.2652e+11 2.9598e+14 723217
## - char_age                       1 6.3987e+11 2.9599e+14 723218
## - as.factor(char_bsmt_fin)       2 6.6791e+11 2.9602e+14 723219
## - char_fbath                     1 8.6016e+11 2.9621e+14 723242
## - char_hbath                     1 8.9864e+11 2.9625e+14 723246
## - as.factor(geo_ohare_noise)     1 1.1806e+12 2.9653e+14 723276
## - as.factor(char_bsmt)           3 1.4970e+12 2.9685e+14 723305
## - as.factor(char_air)            1 2.2926e+12 2.9765e+14 723394
## - as.factor(char_type_resd)      8 3.2543e+12 2.9861e+14 723481
## - char_bldg_sf                   1 1.4268e+13 3.0962e+14 724636
## - econ_tax_rate                  1 2.8380e+13 3.2373e+14 726039
## - econ_midincome                 1 1.2910e+14 4.2446e+14 734569
## 
## Step:  AIC=723151
## sale_price ~ char_rooms + char_frpl + char_bldg_sf + econ_tax_rate + 
##     char_age + char_fbath + econ_midincome + as.factor(geo_floodplain) + 
##     as.factor(geo_withinmr100) + as.factor(geo_ohare_noise) + 
##     as.factor(char_heat) + as.factor(char_gar1_att) + as.factor(char_bsmt) + 
##     char_beds + char_hbath + char_hd_sf + geo_fs_flood_factor + 
##     as.factor(geo_withinmr101300) + as.factor(ind_garage) + as.factor(char_ext_wall) + 
##     as.factor(char_type_resd) + as.factor(char_roof_cnst) + as.factor(char_oheat) + 
##     as.factor(char_gar1_size) + as.factor(char_repair_cnd) + 
##     as.factor(char_bsmt_fin) + as.factor(char_air)
## 
##                                 Df  Sum of Sq        RSS    AIC
## <none>                                        2.9536e+14 723151
## - as.factor(ind_garage)          1 2.1116e+10 2.9538e+14 723151
## - as.factor(char_heat)           3 7.2798e+10 2.9543e+14 723153
## - as.factor(geo_withinmr101300)  1 4.4383e+10 2.9541e+14 723154
## - as.factor(char_gar1_att)       1 6.1077e+10 2.9542e+14 723155
## - char_rooms                     1 7.2032e+10 2.9543e+14 723157
## - as.factor(char_roof_cnst)      5 1.5187e+11 2.9551e+14 723157
## - as.factor(char_repair_cnd)     2 1.6169e+11 2.9552e+14 723164
## - as.factor(char_oheat)          1 1.5554e+11 2.9552e+14 723166
## - char_hd_sf                     1 1.5671e+11 2.9552e+14 723166
## - as.factor(geo_floodplain)      1 1.6347e+11 2.9552e+14 723166
## - char_beds                      1 1.7023e+11 2.9553e+14 723167
## - as.factor(geo_withinmr100)     1 2.1572e+11 2.9558e+14 723172
## - as.factor(char_ext_wall)       3 3.7939e+11 2.9574e+14 723185
## - as.factor(char_gar1_size)      7 6.4216e+11 2.9600e+14 723205
## - char_frpl                      1 5.3893e+11 2.9590e+14 723206
## - char_age                       1 6.3805e+11 2.9600e+14 723217
## - as.factor(char_bsmt_fin)       2 6.6642e+11 2.9603e+14 723218
## - char_fbath                     1 8.6034e+11 2.9622e+14 723241
## - char_hbath                     1 8.9825e+11 2.9626e+14 723245
## - geo_fs_flood_factor            1 9.4283e+11 2.9630e+14 723249
## - as.factor(geo_ohare_noise)     1 1.1818e+12 2.9654e+14 723275
## - as.factor(char_bsmt)           3 1.4959e+12 2.9686e+14 723304
## - as.factor(char_air)            1 2.2954e+12 2.9766e+14 723393
## - as.factor(char_type_resd)      8 3.2641e+12 2.9863e+14 723481
## - char_bldg_sf                   1 1.4267e+13 3.0963e+14 724634
## - econ_tax_rate                  1 2.8373e+13 3.2373e+14 726037
## - econ_midincome                 1 1.2915e+14 4.2451e+14 734572
summary(lm.step.backward)  
## 
## Call:
## lm(formula = sale_price ~ char_rooms + char_frpl + char_bldg_sf + 
##     econ_tax_rate + char_age + char_fbath + econ_midincome + 
##     as.factor(geo_floodplain) + as.factor(geo_withinmr100) + 
##     as.factor(geo_ohare_noise) + as.factor(char_heat) + as.factor(char_gar1_att) + 
##     as.factor(char_bsmt) + char_beds + char_hbath + char_hd_sf + 
##     geo_fs_flood_factor + as.factor(geo_withinmr101300) + as.factor(ind_garage) + 
##     as.factor(char_ext_wall) + as.factor(char_type_resd) + as.factor(char_roof_cnst) + 
##     as.factor(char_oheat) + as.factor(char_gar1_size) + as.factor(char_repair_cnd) + 
##     as.factor(char_bsmt_fin) + as.factor(char_air), data = df_trn_y)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -438080  -63607   -5888   53186  508253 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     5.932e+04  2.451e+04   2.420 0.015508 *  
## char_rooms                     -1.558e+03  5.626e+02  -2.769 0.005629 ** 
## char_frpl                       9.993e+03  1.319e+03   7.574 3.73e-14 ***
## char_bldg_sf                    7.239e+01  1.858e+00  38.968  < 2e-16 ***
## econ_tax_rate                  -6.870e+03  1.250e+02 -54.952  < 2e-16 ***
## char_age                        2.286e+02  2.774e+01   8.241  < 2e-16 ***
## char_fbath                      1.262e+04  1.319e+03   9.569  < 2e-16 ***
## econ_midincome                  2.419e+00  2.063e-02 117.244  < 2e-16 ***
## as.factor(geo_floodplain)1     -2.188e+04  5.246e+03  -4.171 3.04e-05 ***
## as.factor(geo_withinmr100)1    -3.201e+04  6.680e+03  -4.792 1.66e-06 ***
## as.factor(geo_ohare_noise)1     5.115e+04  4.561e+03  11.215  < 2e-16 ***
## as.factor(char_heat)2           2.636e+03  1.776e+03   1.485 0.137679    
## as.factor(char_heat)3          -1.666e+04  8.508e+03  -1.958 0.050245 .  
## as.factor(char_heat)4          -1.412e+04  1.212e+04  -1.165 0.243969    
## as.factor(char_gar1_att)2       4.385e+03  1.720e+03   2.550 0.010788 *  
## as.factor(char_bsmt)2          -1.699e+04  1.907e+03  -8.912  < 2e-16 ***
## as.factor(char_bsmt)3          -1.178e+04  1.795e+03  -6.566 5.26e-11 ***
## as.factor(char_bsmt)4          -2.406e+04  2.464e+03  -9.762  < 2e-16 ***
## char_beds                      -4.465e+03  1.049e+03  -4.256 2.08e-05 ***
## char_hbath                      1.208e+04  1.235e+03   9.778  < 2e-16 ***
## char_hd_sf                      2.668e-01  6.534e-02   4.084 4.44e-05 ***
## geo_fs_flood_factor             3.417e+03  3.411e+02  10.017  < 2e-16 ***
## as.factor(geo_withinmr101300)1 -5.003e+03  2.302e+03  -2.173 0.029756 *  
## as.factor(ind_garage)1         -3.463e+04  2.310e+04  -1.499 0.133846    
## as.factor(char_ext_wall)2       2.868e+03  1.441e+03   1.990 0.046562 *  
## as.factor(char_ext_wall)3       2.753e+03  1.585e+03   1.737 0.082440 .  
## as.factor(char_ext_wall)4       2.940e+04  4.708e+03   6.244 4.32e-10 ***
## as.factor(char_type_resd)2     -4.154e+03  1.707e+03  -2.433 0.014991 *  
## as.factor(char_type_resd)3      8.120e+04  4.951e+03  16.402  < 2e-16 ***
## as.factor(char_type_resd)4      6.562e+03  2.638e+03   2.488 0.012848 *  
## as.factor(char_type_resd)5     -1.118e+03  2.009e+03  -0.556 0.577985    
## as.factor(char_type_resd)6      3.966e+04  3.248e+04   1.221 0.221957    
## as.factor(char_type_resd)7      6.939e+04  2.694e+04   2.576 0.010008 *  
## as.factor(char_type_resd)8      4.342e+04  3.668e+04   1.184 0.236531    
## as.factor(char_type_resd)9      4.871e+04  9.697e+04   0.502 0.615439    
## as.factor(char_roof_cnst)2     -7.096e+02  2.604e+03  -0.272 0.785242    
## as.factor(char_roof_cnst)3     -1.203e+04  1.207e+04  -0.997 0.318993    
## as.factor(char_roof_cnst)4      1.483e+04  8.838e+03   1.678 0.093283 .  
## as.factor(char_roof_cnst)5     -9.702e+03  8.027e+03  -1.209 0.226819    
## as.factor(char_roof_cnst)6      2.999e+04  9.174e+03   3.270 0.001078 ** 
## as.factor(char_oheat)5          1.216e+04  2.989e+03   4.069 4.74e-05 ***
## as.factor(char_gar1_size)2      2.176e+03  2.598e+03   0.838 0.402293    
## as.factor(char_gar1_size)3      9.949e+03  1.638e+03   6.073 1.27e-09 ***
## as.factor(char_gar1_size)4      3.349e+03  3.122e+03   1.073 0.283400    
## as.factor(char_gar1_size)5      2.327e+04  4.487e+03   5.186 2.16e-07 ***
## as.factor(char_gar1_size)6      4.043e+04  1.629e+04   2.482 0.013084 *  
## as.factor(char_gar1_size)7     -4.906e+04  2.321e+04  -2.114 0.034528 *  
## as.factor(char_gar1_size)8     -6.409e+03  1.726e+04  -0.371 0.710373    
## as.factor(char_repair_cnd)2    -2.660e+04  6.633e+03  -4.010 6.10e-05 ***
## as.factor(char_repair_cnd)3    -1.788e+04  1.034e+04  -1.730 0.083684 .  
## as.factor(char_bsmt_fin)2      -1.974e+04  5.267e+03  -3.748 0.000179 ***
## as.factor(char_bsmt_fin)3      -1.209e+04  1.485e+03  -8.142 4.04e-16 ***
## as.factor(char_air)2           -2.136e+04  1.366e+03 -15.630  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 96930 on 31436 degrees of freedom
## Multiple R-squared:  0.5768, Adjusted R-squared:  0.5761 
## F-statistic:   824 on 52 and 31436 DF,  p-value: < 2.2e-16
lm.step.pred.backward <- predict(lm.step.backward, df_tst_y)
head(lm.step.pred.backward)
##         3         9        15        20        22        23 
##  57299.74 191440.69 195804.08  91136.73 207192.23 255702.49
mean((df_tst_y$sale_price-lm.step.pred.backward)^2)
## [1] 9140828696

Forward Selection

lm.null <- lm(sale_price~1, data = df_trn_y)
lm.step.forward <- step(lm.null, scope=list(lower=lm.null, upper=lm_full_y), direction = "forward")
## Start:  AIC=750126.8
## sale_price ~ 1
## 
##                                 Df  Sum of Sq        RSS    AIC
## + econ_midincome                 1 2.5372e+14 4.4425e+14 735902
## + char_bldg_sf                   1 1.4292e+14 5.5505e+14 742914
## + char_frpl                      1 8.5720e+13 6.1225e+14 746003
## + char_fbath                     1 7.8961e+13 6.1901e+14 746348
## + as.factor(char_type_resd)      8 6.7075e+13 6.3090e+14 746961
## + econ_tax_rate                  1 6.4125e+13 6.3384e+14 747094
## + char_rooms                     1 5.5774e+13 6.4220e+14 747506
## + as.factor(char_air)            1 4.8489e+13 6.4948e+14 747861
## + as.factor(char_gar1_att)       1 4.6900e+13 6.5107e+14 747938
## + char_beds                      1 4.2273e+13 6.5570e+14 748161
## + char_hbath                     1 4.1800e+13 6.5617e+14 748184
## + as.factor(char_gar1_size)      7 3.5539e+13 6.6243e+14 748495
## + as.factor(char_gar1_area)      1 2.6488e+13 6.7148e+14 748910
## + as.factor(char_bsmt)           3 1.5156e+13 6.8281e+14 749441
## + as.factor(ind_garage)          1 1.2652e+13 6.8532e+14 749553
## + as.factor(char_ext_wall)       3 1.2108e+13 6.8586e+14 749582
## + char_age                       1 1.1394e+13 6.8658e+14 749610
## + char_hd_sf                     1 1.0628e+13 6.8734e+14 749646
## + as.factor(char_roof_cnst)      5 7.3541e+12 6.9062e+14 749803
## + as.factor(char_repair_cnd)     2 5.2611e+12 6.9271e+14 749893
## + as.factor(geo_ohare_noise)     1 2.6179e+12 6.9535e+14 750010
## + as.factor(char_oheat)          1 2.5400e+12 6.9543e+14 750014
## + as.factor(char_attic_type)     2 1.9325e+12 6.9604e+14 750043
## + as.factor(geo_floodplain)      1 1.3899e+12 6.9658e+14 750066
## + as.factor(char_bsmt_fin)       2 1.4327e+12 6.9654e+14 750066
## + as.factor(char_heat)           3 7.4104e+11 6.9723e+14 750099
## + as.factor(char_tp_plan)        1 2.8532e+11 6.9768e+14 750116
## + as.factor(geo_withinmr100)     1 2.4282e+11 6.9773e+14 750118
## + geo_fs_flood_risk_direction    1 1.3227e+11 6.9784e+14 750123
## <none>                                        6.9797e+14 750127
## + as.factor(geo_withinmr101300)  1 1.2405e+10 6.9796e+14 750128
## + geo_fs_flood_factor            1 1.0584e+10 6.9796e+14 750128
## 
## Step:  AIC=735902.1
## sale_price ~ econ_midincome
## 
##                                 Df  Sum of Sq        RSS    AIC
## + char_bldg_sf                   1 9.4030e+13 3.5022e+14 728415
## + char_fbath                     1 5.8676e+13 3.8557e+14 731443
## + char_rooms                     1 5.1653e+13 3.9259e+14 732012
## + as.factor(char_type_resd)      8 4.9470e+13 3.9478e+14 732200
## + econ_tax_rate                  1 4.7183e+13 3.9706e+14 732368
## + char_beds                      1 4.2435e+13 4.0181e+14 732743
## + char_frpl                      1 1.6023e+13 4.2822e+14 734747
## + as.factor(char_gar1_size)      7 1.4072e+13 4.3017e+14 734902
## + as.factor(char_roof_cnst)      5 1.0956e+13 4.3329e+14 735126
## + as.factor(char_bsmt)           3 8.7456e+12 4.3550e+14 735282
## + char_hbath                     1 7.2679e+12 4.3698e+14 735385
## + as.factor(char_gar1_area)      1 7.2120e+12 4.3703e+14 735389
## + as.factor(char_air)            1 6.6075e+12 4.3764e+14 735432
## + as.factor(char_ext_wall)       3 4.2350e+12 4.4001e+14 735606
## + as.factor(char_heat)           3 3.9850e+12 4.4026e+14 735624
## + as.factor(char_bsmt_fin)       2 2.6687e+12 4.4158e+14 735716
## + as.factor(char_gar1_att)       1 2.3173e+12 4.4193e+14 735739
## + as.factor(char_repair_cnd)     2 1.7657e+12 4.4248e+14 735781
## + geo_fs_flood_factor            1 1.3244e+12 4.4292e+14 735810
## + as.factor(geo_ohare_noise)     1 1.0550e+12 4.4319e+14 735829
## + as.factor(ind_garage)          1 9.9074e+11 4.4325e+14 735834
## + as.factor(geo_floodplain)      1 9.2555e+11 4.4332e+14 735838
## + as.factor(char_attic_type)     2 9.0547e+11 4.4334e+14 735842
## + char_age                       1 5.9083e+11 4.4365e+14 735862
## + geo_fs_flood_risk_direction    1 5.7307e+11 4.4367e+14 735863
## + char_hd_sf                     1 2.6597e+11 4.4398e+14 735885
## + as.factor(char_oheat)          1 1.4755e+11 4.4410e+14 735894
## + as.factor(char_tp_plan)        1 5.7251e+10 4.4419e+14 735900
## <none>                                        4.4425e+14 735902
## + as.factor(geo_withinmr101300)  1 9.3955e+08 4.4424e+14 735904
## + as.factor(geo_withinmr100)     1 5.4272e+07 4.4425e+14 735904
## 
## Step:  AIC=728415.1
## sale_price ~ econ_midincome + char_bldg_sf
## 
##                                 Df  Sum of Sq        RSS    AIC
## + econ_tax_rate                  1 3.3160e+13 3.1705e+14 725285
## + as.factor(char_bsmt)           3 4.9358e+12 3.4528e+14 727974
## + as.factor(char_type_resd)      8 4.7754e+12 3.4544e+14 727999
## + as.factor(char_ext_wall)       3 2.9572e+12 3.4726e+14 728154
## + as.factor(char_bsmt_fin)       2 2.2284e+12 3.4799e+14 728218
## + as.factor(char_gar1_size)      7 2.2364e+12 3.4798e+14 728227
## + as.factor(char_air)            1 2.0116e+12 3.4820e+14 728236
## + as.factor(geo_ohare_noise)     1 1.9154e+12 3.4830e+14 728244
## + char_fbath                     1 1.1428e+12 3.4907e+14 728314
## + as.factor(ind_garage)          1 1.0892e+12 3.4913e+14 728319
## + geo_fs_flood_factor            1 9.0750e+11 3.4931e+14 728335
## + char_age                       1 8.3363e+11 3.4938e+14 728342
## + char_frpl                      1 7.6614e+11 3.4945e+14 728348
## + as.factor(char_roof_cnst)      5 7.4621e+11 3.4947e+14 728358
## + char_hbath                     1 5.8763e+11 3.4963e+14 728364
## + as.factor(geo_floodplain)      1 4.8238e+11 3.4973e+14 728374
## + as.factor(char_attic_type)     2 4.2776e+11 3.4979e+14 728381
## + char_beds                      1 3.8386e+11 3.4983e+14 728383
## + geo_fs_flood_risk_direction    1 3.7351e+11 3.4984e+14 728383
## + as.factor(char_gar1_att)       1 3.3597e+11 3.4988e+14 728387
## + as.factor(char_repair_cnd)     2 3.5341e+11 3.4986e+14 728387
## + as.factor(char_heat)           3 3.3975e+11 3.4988e+14 728391
## + as.factor(char_oheat)          1 1.6263e+11 3.5005e+14 728402
## + char_hd_sf                     1 1.4554e+11 3.5007e+14 728404
## + as.factor(geo_withinmr100)     1 1.1293e+11 3.5010e+14 728407
## + as.factor(char_gar1_area)      1 8.6624e+10 3.5013e+14 728409
## + char_rooms                     1 6.5671e+10 3.5015e+14 728411
## <none>                                        3.5022e+14 728415
## + as.factor(char_tp_plan)        1 2.5605e+09 3.5021e+14 728417
## + as.factor(geo_withinmr101300)  1 1.0383e+09 3.5021e+14 728417
## 
## Step:  AIC=725284.8
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(char_air)            1 5.3171e+12 3.1174e+14 724754
## + as.factor(char_gar1_size)      7 4.2587e+12 3.1280e+14 724873
## + as.factor(char_type_resd)      8 3.7619e+12 3.1329e+14 724925
## + as.factor(char_bsmt_fin)       2 3.4546e+12 3.1360e+14 724944
## + as.factor(char_bsmt)           3 2.8879e+12 3.1417e+14 725003
## + as.factor(ind_garage)          1 2.4315e+12 3.1462e+14 725044
## + char_hbath                     1 2.1677e+12 3.1489e+14 725071
## + char_frpl                      1 1.8687e+12 3.1519e+14 725101
## + as.factor(char_ext_wall)       3 1.3496e+12 3.1571e+14 725156
## + as.factor(geo_ohare_noise)     1 1.1943e+12 3.1586e+14 725168
## + char_fbath                     1 6.9832e+11 3.1636e+14 725217
## + geo_fs_flood_factor            1 6.2733e+11 3.1643e+14 725224
## + char_beds                      1 5.5430e+11 3.1650e+14 725232
## + as.factor(char_gar1_area)      1 5.3881e+11 3.1652e+14 725233
## + char_age                       1 4.4059e+11 3.1661e+14 725243
## + as.factor(char_oheat)          1 3.4221e+11 3.1671e+14 725253
## + as.factor(char_gar1_att)       1 3.2537e+11 3.1673e+14 725254
## + as.factor(char_repair_cnd)     2 3.1412e+11 3.1674e+14 725258
## + char_rooms                     1 1.9317e+11 3.1686e+14 725268
## + as.factor(geo_withinmr100)     1 1.9009e+11 3.1686e+14 725268
## + geo_fs_flood_risk_direction    1 1.7770e+11 3.1688e+14 725269
## + as.factor(char_roof_cnst)      5 2.0418e+11 3.1685e+14 725274
## + as.factor(geo_floodplain)      1 1.0750e+11 3.1695e+14 725276
## + char_hd_sf                     1 9.0262e+10 3.1696e+14 725278
## + as.factor(geo_withinmr101300)  1 5.4797e+10 3.1700e+14 725281
## + as.factor(char_attic_type)     2 6.0917e+10 3.1699e+14 725283
## <none>                                        3.1705e+14 725285
## + as.factor(char_heat)           3 5.5903e+10 3.1700e+14 725285
## + as.factor(char_tp_plan)        1 2.6858e+09 3.1705e+14 725286
## 
## Step:  AIC=724754.2
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(char_type_resd)      8 2.9618e+12 3.0878e+14 724470
## + as.factor(char_gar1_size)      7 2.9328e+12 3.0880e+14 724471
## + as.factor(char_bsmt)           3 2.7466e+12 3.0899e+14 724482
## + as.factor(char_bsmt_fin)       2 2.1974e+12 3.0954e+14 724535
## + as.factor(ind_garage)          1 1.7308e+12 3.1001e+14 724581
## + as.factor(char_ext_wall)       3 1.2950e+12 3.1044e+14 724629
## + as.factor(geo_ohare_noise)     1 1.0746e+12 3.1066e+14 724647
## + char_frpl                      1 9.3918e+11 3.1080e+14 724661
## + char_hbath                     1 8.6779e+11 3.1087e+14 724668
## + geo_fs_flood_factor            1 7.6459e+11 3.1097e+14 724679
## + char_fbath                     1 5.7712e+11 3.1116e+14 724698
## + as.factor(char_heat)           3 5.1420e+11 3.1122e+14 724708
## + geo_fs_flood_risk_direction    1 2.7650e+11 3.1146e+14 724728
## + as.factor(char_repair_cnd)     2 2.6229e+11 3.1148e+14 724732
## + as.factor(char_attic_type)     2 2.4575e+11 3.1149e+14 724733
## + as.factor(char_oheat)          1 2.0011e+11 3.1154e+14 724736
## + as.factor(geo_withinmr100)     1 1.7473e+11 3.1156e+14 724739
## + as.factor(char_roof_cnst)      5 2.3503e+11 3.1150e+14 724740
## + char_age                       1 1.3316e+11 3.1160e+14 724743
## + char_beds                      1 1.1017e+11 3.1163e+14 724745
## + as.factor(geo_floodplain)      1 9.1265e+10 3.1165e+14 724747
## + char_hd_sf                     1 8.3658e+10 3.1165e+14 724748
## + as.factor(char_gar1_area)      1 7.1830e+10 3.1167e+14 724749
## + as.factor(geo_withinmr101300)  1 3.9293e+10 3.1170e+14 724752
## <none>                                        3.1174e+14 724754
## + as.factor(char_gar1_att)       1 1.9126e+10 3.1172e+14 724754
## + char_rooms                     1 6.9024e+09 3.1173e+14 724755
## + as.factor(char_tp_plan)        1 4.7630e+08 3.1174e+14 724756
## 
## Step:  AIC=724469.6
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(char_bsmt)           3 3.6395e+12 3.0514e+14 724102
## + as.factor(char_gar1_size)      7 2.9259e+12 3.0585e+14 724184
## + as.factor(char_bsmt_fin)       2 2.1517e+12 3.0662e+14 724253
## + as.factor(ind_garage)          1 1.6820e+12 3.0709e+14 724300
## + as.factor(geo_ohare_noise)     1 1.1138e+12 3.0766e+14 724358
## + as.factor(char_ext_wall)       3 1.1371e+12 3.0764e+14 724359
## + char_frpl                      1 1.0448e+12 3.0773e+14 724365
## + char_hbath                     1 7.8293e+11 3.0799e+14 724392
## + geo_fs_flood_factor            1 6.9041e+11 3.0809e+14 724401
## + as.factor(char_heat)           3 5.4658e+11 3.0823e+14 724420
## + char_fbath                     1 3.8045e+11 3.0840e+14 724433
## + char_age                       1 3.5654e+11 3.0842e+14 724435
## + geo_fs_flood_risk_direction    1 2.7353e+11 3.0850e+14 724444
## + as.factor(char_attic_type)     2 2.5509e+11 3.0852e+14 724448
## + as.factor(geo_withinmr100)     1 2.1013e+11 3.0857e+14 724450
## + as.factor(char_repair_cnd)     2 2.2768e+11 3.0855e+14 724450
## + as.factor(char_oheat)          1 1.7718e+11 3.0860e+14 724454
## + char_hd_sf                     1 1.3250e+11 3.0864e+14 724458
## + char_beds                      1 1.0741e+11 3.0867e+14 724461
## + as.factor(char_roof_cnst)      5 1.8088e+11 3.0859e+14 724461
## + as.factor(char_gar1_att)       1 8.7261e+10 3.0869e+14 724463
## + as.factor(geo_floodplain)      1 8.3144e+10 3.0869e+14 724463
## + as.factor(geo_withinmr101300)  1 4.2508e+10 3.0873e+14 724467
## + char_rooms                     1 4.1057e+10 3.0873e+14 724467
## <none>                                        3.0878e+14 724470
## + as.factor(char_tp_plan)        1 1.0701e+09 3.0877e+14 724471
## + as.factor(char_gar1_area)      1 3.4964e+07 3.0878e+14 724472
## 
## Step:  AIC=724102.2
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(char_gar1_size)      7 2.3452e+12 3.0279e+14 723873
## + as.factor(ind_garage)          1 1.6690e+12 3.0347e+14 723932
## + as.factor(char_bsmt_fin)       2 1.1495e+12 3.0399e+14 723987
## + as.factor(geo_ohare_noise)     1 1.0500e+12 3.0409e+14 723996
## + char_frpl                      1 9.8868e+11 3.0415e+14 724002
## + geo_fs_flood_factor            1 7.2699e+11 3.0441e+14 724029
## + char_hbath                     1 7.1328e+11 3.0442e+14 724031
## + as.factor(char_ext_wall)       3 5.0737e+11 3.0463e+14 724056
## + char_fbath                     1 3.3688e+11 3.0480e+14 724069
## + geo_fs_flood_risk_direction    1 2.7435e+11 3.0486e+14 724076
## + char_hd_sf                     1 2.4949e+11 3.0489e+14 724078
## + char_beds                      1 2.2394e+11 3.0491e+14 724081
## + as.factor(char_repair_cnd)     2 2.2005e+11 3.0492e+14 724083
## + as.factor(geo_withinmr100)     1 1.9951e+11 3.0494e+14 724084
## + char_rooms                     1 1.8704e+11 3.0495e+14 724085
## + as.factor(char_heat)           3 2.2012e+11 3.0492e+14 724085
## + char_age                       1 1.5729e+11 3.0498e+14 724088
## + as.factor(char_roof_cnst)      5 2.0946e+11 3.0493e+14 724091
## + as.factor(char_oheat)          1 1.2032e+11 3.0502e+14 724092
## + as.factor(char_attic_type)     2 9.6079e+10 3.0504e+14 724096
## + as.factor(geo_floodplain)      1 6.6851e+10 3.0507e+14 724097
## + as.factor(geo_withinmr101300)  1 5.2572e+10 3.0508e+14 724099
## <none>                                        3.0514e+14 724102
## + as.factor(char_gar1_area)      1 1.1500e+10 3.0512e+14 724103
## + as.factor(char_gar1_att)       1 9.2958e+08 3.0514e+14 724104
## + as.factor(char_tp_plan)        1 1.2769e+07 3.0514e+14 724104
## 
## Step:  AIC=723873.3
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(geo_ohare_noise)     1 1.0422e+12 3.0175e+14 723767
## + as.factor(char_bsmt_fin)       2 1.0490e+12 3.0174e+14 723768
## + char_frpl                      1 7.9733e+11 3.0199e+14 723792
## + geo_fs_flood_factor            1 7.7110e+11 3.0202e+14 723795
## + char_hbath                     1 6.3614e+11 3.0215e+14 723809
## + as.factor(char_ext_wall)       3 4.7425e+11 3.0232e+14 723830
## + char_age                       1 3.6793e+11 3.0242e+14 723837
## + char_fbath                     1 3.5938e+11 3.0243e+14 723838
## + geo_fs_flood_risk_direction    1 2.8255e+11 3.0251e+14 723846
## + as.factor(char_repair_cnd)     2 2.0780e+11 3.0258e+14 723856
## + char_hd_sf                     1 1.5803e+11 3.0263e+14 723859
## + as.factor(geo_withinmr100)     1 1.4863e+11 3.0264e+14 723860
## + char_beds                      1 1.4755e+11 3.0264e+14 723860
## + as.factor(char_heat)           3 1.7736e+11 3.0261e+14 723861
## + char_rooms                     1 1.3809e+11 3.0265e+14 723861
## + as.factor(char_oheat)          1 1.1010e+11 3.0268e+14 723864
## + as.factor(char_roof_cnst)      5 1.7080e+11 3.0262e+14 723865
## + as.factor(geo_floodplain)      1 6.5950e+10 3.0273e+14 723868
## + as.factor(char_attic_type)     2 8.4423e+10 3.0271e+14 723868
## + as.factor(char_gar1_att)       1 4.2371e+10 3.0275e+14 723871
## + as.factor(geo_withinmr101300)  1 3.5091e+10 3.0276e+14 723872
## + as.factor(ind_garage)          1 2.5531e+10 3.0277e+14 723873
## <none>                                        3.0279e+14 723873
## + as.factor(char_gar1_area)      1 7.1562e+09 3.0278e+14 723875
## + as.factor(char_tp_plan)        1 5.0742e+08 3.0279e+14 723875
## 
## Step:  AIC=723766.7
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(char_bsmt_fin)       2 1.0262e+12 3.0072e+14 723663
## + geo_fs_flood_factor            1 8.1895e+11 3.0093e+14 723683
## + char_frpl                      1 8.1783e+11 3.0093e+14 723683
## + char_hbath                     1 6.2422e+11 3.0112e+14 723703
## + as.factor(char_ext_wall)       3 4.7386e+11 3.0127e+14 723723
## + char_age                       1 3.9499e+11 3.0135e+14 723727
## + char_fbath                     1 3.8338e+11 3.0137e+14 723729
## + geo_fs_flood_risk_direction    1 3.0635e+11 3.0144e+14 723737
## + as.factor(char_repair_cnd)     2 2.1649e+11 3.0153e+14 723748
## + char_hd_sf                     1 1.6478e+11 3.0158e+14 723751
## + as.factor(geo_withinmr100)     1 1.6439e+11 3.0158e+14 723752
## + char_beds                      1 1.5143e+11 3.0160e+14 723753
## + as.factor(char_heat)           3 1.8123e+11 3.0157e+14 723754
## + char_rooms                     1 1.3310e+11 3.0162e+14 723755
## + as.factor(char_oheat)          1 1.1421e+11 3.0163e+14 723757
## + as.factor(char_roof_cnst)      5 1.7350e+11 3.0158e+14 723759
## + as.factor(char_attic_type)     2 8.4666e+10 3.0166e+14 723762
## + as.factor(geo_floodplain)      1 6.1899e+10 3.0169e+14 723762
## + as.factor(geo_withinmr101300)  1 4.0127e+10 3.0171e+14 723765
## + as.factor(char_gar1_att)       1 3.5550e+10 3.0171e+14 723765
## + as.factor(ind_garage)          1 2.7422e+10 3.0172e+14 723766
## <none>                                        3.0175e+14 723767
## + as.factor(char_gar1_area)      1 5.1207e+09 3.0174e+14 723768
## + as.factor(char_tp_plan)        1 6.4289e+08 3.0175e+14 723769
## 
## Step:  AIC=723663.4
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + geo_fs_flood_factor            1 8.4056e+11 2.9988e+14 723577
## + char_frpl                      1 7.5758e+11 2.9997e+14 723586
## + char_hbath                     1 4.5611e+11 3.0027e+14 723618
## + as.factor(char_ext_wall)       3 4.7786e+11 3.0024e+14 723619
## + char_age                       1 4.2981e+11 3.0029e+14 723620
## + geo_fs_flood_risk_direction    1 3.0526e+11 3.0042e+14 723633
## + char_fbath                     1 2.9027e+11 3.0043e+14 723635
## + as.factor(char_repair_cnd)     2 2.2687e+11 3.0050e+14 723644
## + char_beds                      1 1.9125e+11 3.0053e+14 723645
## + char_hd_sf                     1 1.6532e+11 3.0056e+14 723648
## + as.factor(geo_withinmr100)     1 1.5777e+11 3.0056e+14 723649
## + char_rooms                     1 1.4553e+11 3.0058e+14 723650
## + as.factor(char_oheat)          1 1.3124e+11 3.0059e+14 723652
## + as.factor(char_heat)           3 1.5656e+11 3.0057e+14 723653
## + as.factor(char_roof_cnst)      5 1.8243e+11 3.0054e+14 723654
## + as.factor(geo_floodplain)      1 5.4121e+10 3.0067e+14 723660
## + as.factor(char_attic_type)     2 6.5331e+10 3.0066e+14 723661
## + as.factor(ind_garage)          1 3.5910e+10 3.0069e+14 723662
## + as.factor(geo_withinmr101300)  1 3.5345e+10 3.0069e+14 723662
## + as.factor(char_gar1_att)       1 3.0213e+10 3.0069e+14 723662
## <none>                                        3.0072e+14 723663
## + as.factor(char_gar1_area)      1 2.7120e+09 3.0072e+14 723665
## + as.factor(char_tp_plan)        1 3.6310e+08 3.0072e+14 723665
## 
## Step:  AIC=723577.3
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor
## 
##                                 Df  Sum of Sq        RSS    AIC
## + char_frpl                      1 7.9572e+11 2.9909e+14 723496
## + char_hbath                     1 4.6604e+11 2.9942e+14 723530
## + as.factor(char_ext_wall)       3 4.9381e+11 2.9939e+14 723531
## + char_age                       1 3.8140e+11 2.9950e+14 723539
## + char_fbath                     1 2.6914e+11 2.9961e+14 723551
## + as.factor(char_repair_cnd)     2 2.2788e+11 2.9965e+14 723557
## + char_beds                      1 2.0296e+11 2.9968e+14 723558
## + as.factor(geo_withinmr100)     1 1.7487e+11 2.9971e+14 723561
## + char_hd_sf                     1 1.7321e+11 2.9971e+14 723561
## + as.factor(geo_floodplain)      1 1.7115e+11 2.9971e+14 723561
## + char_rooms                     1 1.5407e+11 2.9973e+14 723563
## + as.factor(char_oheat)          1 1.4345e+11 2.9974e+14 723564
## + as.factor(char_heat)           3 1.6538e+11 2.9972e+14 723566
## + as.factor(char_roof_cnst)      5 1.7261e+11 2.9971e+14 723569
## + as.factor(char_attic_type)     2 6.8360e+10 2.9981e+14 723574
## + as.factor(geo_withinmr101300)  1 4.1814e+10 2.9984e+14 723575
## + as.factor(ind_garage)          1 3.5986e+10 2.9985e+14 723576
## + as.factor(char_gar1_att)       1 1.9614e+10 2.9986e+14 723577
## <none>                                        2.9988e+14 723577
## + geo_fs_flood_risk_direction    1 5.4792e+09 2.9988e+14 723579
## + as.factor(char_gar1_area)      1 1.6830e+09 2.9988e+14 723579
## + as.factor(char_tp_plan)        1 2.7565e+08 2.9988e+14 723579
## 
## Step:  AIC=723495.6
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl
## 
##                                 Df  Sum of Sq        RSS    AIC
## + char_age                       1 5.2123e+11 2.9857e+14 723443
## + as.factor(char_ext_wall)       3 4.7720e+11 2.9861e+14 723451
## + char_hbath                     1 3.2948e+11 2.9876e+14 723463
## + char_fbath                     1 2.7103e+11 2.9882e+14 723469
## + as.factor(geo_floodplain)      1 1.7733e+11 2.9891e+14 723479
## + as.factor(char_repair_cnd)     2 1.9046e+11 2.9890e+14 723480
## + as.factor(geo_withinmr100)     1 1.5779e+11 2.9893e+14 723481
## + as.factor(char_oheat)          1 1.4311e+11 2.9894e+14 723483
## + as.factor(char_gar1_att)       1 1.3226e+11 2.9895e+14 723484
## + char_beds                      1 1.3127e+11 2.9896e+14 723484
## + as.factor(char_heat)           3 1.5832e+11 2.9893e+14 723485
## + char_hd_sf                     1 1.0968e+11 2.9898e+14 723486
## + char_rooms                     1 9.6667e+10 2.9899e+14 723487
## + as.factor(char_roof_cnst)      5 1.6967e+11 2.9892e+14 723488
## + as.factor(char_attic_type)     2 6.2953e+10 2.9902e+14 723493
## + as.factor(geo_withinmr101300)  1 3.7699e+10 2.9905e+14 723494
## + as.factor(ind_garage)          1 3.3154e+10 2.9905e+14 723494
## <none>                                        2.9909e+14 723496
## + as.factor(char_gar1_area)      1 1.8186e+10 2.9907e+14 723496
## + geo_fs_flood_risk_direction    1 4.4032e+09 2.9908e+14 723497
## + as.factor(char_tp_plan)        1 9.7360e+03 2.9909e+14 723498
## 
## Step:  AIC=723442.7
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age
## 
##                                 Df  Sum of Sq        RSS    AIC
## + char_hbath                     1 5.2687e+11 2.9804e+14 723389
## + as.factor(char_ext_wall)       3 3.8675e+11 2.9818e+14 723408
## + char_beds                      1 2.8301e+11 2.9828e+14 723415
## + char_fbath                     1 2.4352e+11 2.9832e+14 723419
## + char_rooms                     1 2.4223e+11 2.9832e+14 723419
## + as.factor(char_oheat)          1 1.8819e+11 2.9838e+14 723425
## + as.factor(geo_withinmr100)     1 1.6420e+11 2.9840e+14 723427
## + as.factor(char_repair_cnd)     2 1.8154e+11 2.9838e+14 723428
## + as.factor(geo_floodplain)      1 1.5727e+11 2.9841e+14 723428
## + char_hd_sf                     1 1.1987e+11 2.9845e+14 723432
## + as.factor(char_roof_cnst)      5 1.7227e+11 2.9839e+14 723435
## + as.factor(char_heat)           3 8.8240e+10 2.9848e+14 723439
## + as.factor(geo_withinmr101300)  1 3.7173e+10 2.9853e+14 723441
## + as.factor(ind_garage)          1 3.2478e+10 2.9853e+14 723441
## <none>                                        2.9857e+14 723443
## + as.factor(char_gar1_att)       1 1.3062e+10 2.9855e+14 723443
## + geo_fs_flood_risk_direction    1 7.3661e+09 2.9856e+14 723444
## + as.factor(char_gar1_area)      1 4.7645e+09 2.9856e+14 723444
## + as.factor(char_tp_plan)        1 2.2493e+08 2.9856e+14 723445
## + as.factor(char_attic_type)     2 1.3141e+10 2.9855e+14 723445
## 
## Step:  AIC=723389.1
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath
## 
##                                 Df  Sum of Sq        RSS    AIC
## + char_fbath                     1 4.9195e+11 2.9755e+14 723339
## + as.factor(char_ext_wall)       3 3.6462e+11 2.9767e+14 723357
## + char_beds                      1 2.8902e+11 2.9775e+14 723361
## + char_rooms                     1 2.3579e+11 2.9780e+14 723366
## + as.factor(char_oheat)          1 1.9407e+11 2.9784e+14 723371
## + as.factor(geo_withinmr100)     1 1.7351e+11 2.9786e+14 723373
## + as.factor(char_repair_cnd)     2 1.7758e+11 2.9786e+14 723374
## + as.factor(geo_floodplain)      1 1.5409e+11 2.9788e+14 723375
## + char_hd_sf                     1 1.1816e+11 2.9792e+14 723379
## + as.factor(char_roof_cnst)      5 1.7460e+11 2.9786e+14 723381
## + as.factor(char_heat)           3 8.2396e+10 2.9796e+14 723386
## + as.factor(geo_withinmr101300)  1 3.6038e+10 2.9800e+14 723387
## + as.factor(ind_garage)          1 3.3536e+10 2.9800e+14 723388
## + as.factor(char_gar1_att)       1 2.8872e+10 2.9801e+14 723388
## <none>                                        2.9804e+14 723389
## + geo_fs_flood_risk_direction    1 7.8528e+09 2.9803e+14 723390
## + as.factor(char_gar1_area)      1 1.9645e+09 2.9804e+14 723391
## + as.factor(char_tp_plan)        1 1.3458e+08 2.9804e+14 723391
## + as.factor(char_attic_type)     2 9.8961e+09 2.9803e+14 723392
## 
## Step:  AIC=723339.1
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath
## 
##                                 Df  Sum of Sq        RSS    AIC
## + char_beds                      1 5.0837e+11 2.9704e+14 723287
## + char_rooms                     1 4.1960e+11 2.9713e+14 723297
## + as.factor(char_ext_wall)       3 3.9479e+11 2.9715e+14 723303
## + as.factor(char_oheat)          1 2.2034e+11 2.9733e+14 723318
## + as.factor(geo_withinmr100)     1 1.8706e+11 2.9736e+14 723321
## + as.factor(char_repair_cnd)     2 1.7591e+11 2.9737e+14 723324
## + as.factor(geo_floodplain)      1 1.4987e+11 2.9740e+14 723325
## + char_hd_sf                     1 1.2773e+11 2.9742e+14 723328
## + as.factor(char_roof_cnst)      5 1.7115e+11 2.9738e+14 723331
## + as.factor(char_heat)           3 8.6453e+10 2.9746e+14 723336
## + as.factor(char_gar1_att)       1 4.7694e+10 2.9750e+14 723336
## + as.factor(ind_garage)          1 4.0057e+10 2.9751e+14 723337
## + as.factor(geo_withinmr101300)  1 3.6769e+10 2.9751e+14 723337
## <none>                                        2.9755e+14 723339
## + geo_fs_flood_risk_direction    1 8.3059e+09 2.9754e+14 723340
## + as.factor(char_gar1_area)      1 3.6906e+09 2.9754e+14 723341
## + as.factor(char_tp_plan)        1 2.2489e+08 2.9755e+14 723341
## + as.factor(char_attic_type)     2 1.4289e+10 2.9753e+14 723342
## 
## Step:  AIC=723287.2
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(char_ext_wall)       3 3.9677e+11 2.9664e+14 723251
## + as.factor(geo_withinmr100)     1 2.0056e+11 2.9684e+14 723268
## + as.factor(char_oheat)          1 1.9367e+11 2.9684e+14 723269
## + as.factor(char_repair_cnd)     2 1.6448e+11 2.9687e+14 723274
## + as.factor(geo_floodplain)      1 1.4065e+11 2.9690e+14 723274
## + char_hd_sf                     1 1.3196e+11 2.9691e+14 723275
## + char_rooms                     1 8.3291e+10 2.9695e+14 723280
## + as.factor(char_roof_cnst)      5 1.5487e+11 2.9688e+14 723281
## + as.factor(char_gar1_att)       1 6.2085e+10 2.9698e+14 723283
## + as.factor(char_heat)           3 9.4265e+10 2.9694e+14 723283
## + as.factor(geo_withinmr101300)  1 4.0026e+10 2.9700e+14 723285
## + as.factor(ind_garage)          1 2.7303e+10 2.9701e+14 723286
## <none>                                        2.9704e+14 723287
## + geo_fs_flood_risk_direction    1 7.1451e+09 2.9703e+14 723288
## + as.factor(char_gar1_area)      1 2.1147e+09 2.9704e+14 723289
## + as.factor(char_tp_plan)        1 8.2909e+08 2.9704e+14 723289
## + as.factor(char_attic_type)     2 1.6133e+10 2.9702e+14 723290
## 
## Step:  AIC=723251.1
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(geo_withinmr100)     1 2.0901e+11 2.9643e+14 723231
## + as.factor(char_oheat)          1 1.8743e+11 2.9645e+14 723233
## + as.factor(char_repair_cnd)     2 1.6846e+11 2.9647e+14 723237
## + char_hd_sf                     1 1.4795e+11 2.9649e+14 723237
## + as.factor(geo_floodplain)      1 1.4143e+11 2.9650e+14 723238
## + as.factor(char_roof_cnst)      5 1.5855e+11 2.9648e+14 723244
## + char_rooms                     1 7.9883e+10 2.9656e+14 723245
## + as.factor(char_gar1_att)       1 4.3301e+10 2.9660e+14 723249
## + as.factor(char_heat)           3 7.9735e+10 2.9656e+14 723249
## + as.factor(geo_withinmr101300)  1 3.8875e+10 2.9660e+14 723249
## + as.factor(ind_garage)          1 2.7591e+10 2.9661e+14 723250
## <none>                                        2.9664e+14 723251
## + geo_fs_flood_risk_direction    1 7.5306e+09 2.9663e+14 723252
## + as.factor(char_tp_plan)        1 1.2165e+09 2.9664e+14 723253
## + as.factor(char_gar1_area)      1 1.1172e+09 2.9664e+14 723253
## + as.factor(char_attic_type)     2 1.3239e+10 2.9663e+14 723254
## 
## Step:  AIC=723230.9
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall) + 
##     as.factor(geo_withinmr100)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(char_oheat)          1 1.8821e+11 2.9624e+14 723213
## + as.factor(char_repair_cnd)     2 1.6847e+11 2.9626e+14 723217
## + char_hd_sf                     1 1.4333e+11 2.9629e+14 723218
## + as.factor(geo_floodplain)      1 1.4114e+11 2.9629e+14 723218
## + as.factor(char_roof_cnst)      5 1.6296e+11 2.9627e+14 723224
## + char_rooms                     1 8.2903e+10 2.9635e+14 723224
## + as.factor(char_gar1_att)       1 4.3823e+10 2.9639e+14 723228
## + as.factor(geo_withinmr101300)  1 4.3793e+10 2.9639e+14 723228
## + as.factor(char_heat)           3 7.8139e+10 2.9635e+14 723229
## + as.factor(ind_garage)          1 2.7463e+10 2.9640e+14 723230
## <none>                                        2.9643e+14 723231
## + geo_fs_flood_risk_direction    1 6.1131e+09 2.9643e+14 723232
## + as.factor(char_tp_plan)        1 1.4583e+09 2.9643e+14 723233
## + as.factor(char_gar1_area)      1 1.3006e+09 2.9643e+14 723233
## + as.factor(char_attic_type)     2 1.2208e+10 2.9642e+14 723234
## 
## Step:  AIC=723212.9
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall) + 
##     as.factor(geo_withinmr100) + as.factor(char_oheat)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(char_repair_cnd)     2 1.6772e+11 2.9608e+14 723199
## + as.factor(geo_floodplain)      1 1.4153e+11 2.9610e+14 723200
## + char_hd_sf                     1 1.4098e+11 2.9610e+14 723200
## + as.factor(char_roof_cnst)      5 1.5805e+11 2.9609e+14 723206
## + char_rooms                     1 7.2900e+10 2.9617e+14 723207
## + as.factor(char_gar1_att)       1 4.4213e+10 2.9620e+14 723210
## + as.factor(geo_withinmr101300)  1 4.3867e+10 2.9620e+14 723210
## + as.factor(ind_garage)          1 2.7204e+10 2.9622e+14 723212
## + as.factor(char_heat)           3 6.3509e+10 2.9618e+14 723212
## <none>                                        2.9624e+14 723213
## + geo_fs_flood_risk_direction    1 5.9479e+09 2.9624e+14 723214
## + as.factor(char_tp_plan)        1 1.6395e+09 2.9624e+14 723215
## + as.factor(char_gar1_area)      1 1.1902e+09 2.9624e+14 723215
## + as.factor(char_attic_type)     2 1.1285e+10 2.9623e+14 723216
## 
## Step:  AIC=723199.1
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall) + 
##     as.factor(geo_withinmr100) + as.factor(char_oheat) + as.factor(char_repair_cnd)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(geo_floodplain)      1 1.4108e+11 2.9594e+14 723186
## + char_hd_sf                     1 1.3658e+11 2.9594e+14 723187
## + as.factor(char_roof_cnst)      5 1.5229e+11 2.9592e+14 723193
## + char_rooms                     1 7.4218e+10 2.9600e+14 723193
## + as.factor(char_gar1_att)       1 4.6802e+10 2.9603e+14 723196
## + as.factor(geo_withinmr101300)  1 4.4846e+10 2.9603e+14 723196
## + as.factor(ind_garage)          1 2.7214e+10 2.9605e+14 723198
## + as.factor(char_heat)           3 6.3592e+10 2.9601e+14 723198
## <none>                                        2.9608e+14 723199
## + geo_fs_flood_risk_direction    1 6.5294e+09 2.9607e+14 723200
## + as.factor(char_tp_plan)        1 5.0231e+09 2.9607e+14 723201
## + as.factor(char_gar1_area)      1 1.5289e+09 2.9607e+14 723201
## + as.factor(char_attic_type)     2 1.1442e+10 2.9606e+14 723202
## 
## Step:  AIC=723186.1
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall) + 
##     as.factor(geo_withinmr100) + as.factor(char_oheat) + as.factor(char_repair_cnd) + 
##     as.factor(geo_floodplain)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + char_hd_sf                     1 1.5502e+11 2.9578e+14 723172
## + as.factor(char_roof_cnst)      5 1.5160e+11 2.9578e+14 723180
## + char_rooms                     1 7.5409e+10 2.9586e+14 723180
## + as.factor(char_gar1_att)       1 4.7913e+10 2.9589e+14 723183
## + as.factor(geo_withinmr101300)  1 4.4508e+10 2.9589e+14 723183
## + as.factor(ind_garage)          1 2.8529e+10 2.9591e+14 723185
## + as.factor(char_heat)           3 6.4673e+10 2.9587e+14 723185
## <none>                                        2.9594e+14 723186
## + geo_fs_flood_risk_direction    1 9.2951e+09 2.9593e+14 723187
## + as.factor(char_tp_plan)        1 5.5292e+09 2.9593e+14 723187
## + as.factor(char_gar1_area)      1 1.6604e+09 2.9593e+14 723188
## + as.factor(char_attic_type)     2 1.1395e+10 2.9592e+14 723189
## 
## Step:  AIC=723171.6
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall) + 
##     as.factor(geo_withinmr100) + as.factor(char_oheat) + as.factor(char_repair_cnd) + 
##     as.factor(geo_floodplain) + char_hd_sf
## 
##                                 Df  Sum of Sq        RSS    AIC
## + char_rooms                     1 7.2016e+10 2.9571e+14 723166
## + as.factor(char_roof_cnst)      5 1.4659e+11 2.9563e+14 723166
## + as.factor(char_gar1_att)       1 6.3316e+10 2.9572e+14 723167
## + as.factor(geo_withinmr101300)  1 4.2951e+10 2.9574e+14 723169
## + as.factor(char_heat)           3 6.7536e+10 2.9571e+14 723170
## + as.factor(ind_garage)          1 2.1749e+10 2.9576e+14 723171
## <none>                                        2.9578e+14 723172
## + geo_fs_flood_risk_direction    1 9.5204e+09 2.9577e+14 723173
## + as.factor(char_tp_plan)        1 6.1293e+09 2.9577e+14 723173
## + as.factor(char_gar1_area)      1 8.2713e+08 2.9578e+14 723173
## + as.factor(char_attic_type)     2 9.7497e+09 2.9577e+14 723175
## 
## Step:  AIC=723165.9
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall) + 
##     as.factor(geo_withinmr100) + as.factor(char_oheat) + as.factor(char_repair_cnd) + 
##     as.factor(geo_floodplain) + char_hd_sf + char_rooms
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(char_roof_cnst)      5 1.4913e+11 2.9556e+14 723160
## + as.factor(char_gar1_att)       1 5.7902e+10 2.9565e+14 723162
## + as.factor(geo_withinmr101300)  1 4.3020e+10 2.9567e+14 723163
## + as.factor(char_heat)           3 7.2255e+10 2.9564e+14 723164
## + as.factor(ind_garage)          1 1.9764e+10 2.9569e+14 723166
## <none>                                        2.9571e+14 723166
## + geo_fs_flood_risk_direction    1 8.9735e+09 2.9570e+14 723167
## + as.factor(char_tp_plan)        1 6.3054e+09 2.9570e+14 723167
## + as.factor(char_gar1_area)      1 9.0503e+08 2.9571e+14 723168
## + as.factor(char_attic_type)     2 1.0195e+10 2.9570e+14 723169
## 
## Step:  AIC=723160
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall) + 
##     as.factor(geo_withinmr100) + as.factor(char_oheat) + as.factor(char_repair_cnd) + 
##     as.factor(geo_floodplain) + char_hd_sf + char_rooms + as.factor(char_roof_cnst)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(char_gar1_att)       1 5.9441e+10 2.9550e+14 723156
## + as.factor(geo_withinmr101300)  1 4.3789e+10 2.9552e+14 723157
## + as.factor(char_heat)           3 7.2505e+10 2.9549e+14 723158
## + as.factor(ind_garage)          1 1.9910e+10 2.9554e+14 723160
## <none>                                        2.9556e+14 723160
## + as.factor(char_tp_plan)        1 8.6379e+09 2.9555e+14 723161
## + geo_fs_flood_risk_direction    1 8.5332e+09 2.9555e+14 723161
## + as.factor(char_gar1_area)      1 9.7859e+08 2.9556e+14 723162
## + as.factor(char_attic_type)     2 1.1372e+10 2.9555e+14 723163
## 
## Step:  AIC=723155.7
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall) + 
##     as.factor(geo_withinmr100) + as.factor(char_oheat) + as.factor(char_repair_cnd) + 
##     as.factor(geo_floodplain) + char_hd_sf + char_rooms + as.factor(char_roof_cnst) + 
##     as.factor(char_gar1_att)
## 
##                                 Df  Sum of Sq        RSS    AIC
## + as.factor(geo_withinmr101300)  1 4.4424e+10 2.9546e+14 723153
## + as.factor(char_heat)           3 7.3078e+10 2.9543e+14 723154
## + as.factor(ind_garage)          1 2.0329e+10 2.9548e+14 723156
## <none>                                        2.9550e+14 723156
## + geo_fs_flood_risk_direction    1 8.7785e+09 2.9549e+14 723157
## + as.factor(char_tp_plan)        1 8.3220e+09 2.9549e+14 723157
## + as.factor(char_gar1_area)      1 1.0115e+09 2.9550e+14 723158
## + as.factor(char_attic_type)     2 1.0771e+10 2.9549e+14 723159
## 
## Step:  AIC=723153
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall) + 
##     as.factor(geo_withinmr100) + as.factor(char_oheat) + as.factor(char_repair_cnd) + 
##     as.factor(geo_floodplain) + char_hd_sf + char_rooms + as.factor(char_roof_cnst) + 
##     as.factor(char_gar1_att) + as.factor(geo_withinmr101300)
## 
##                               Df  Sum of Sq        RSS    AIC
## + as.factor(char_heat)         3 7.2523e+10 2.9538e+14 723151
## + as.factor(ind_garage)        1 2.0841e+10 2.9543e+14 723153
## <none>                                      2.9546e+14 723153
## + geo_fs_flood_risk_direction  1 8.2149e+09 2.9545e+14 723154
## + as.factor(char_tp_plan)      1 8.1798e+09 2.9545e+14 723154
## + as.factor(char_gar1_area)    1 1.0660e+09 2.9545e+14 723155
## + as.factor(char_attic_type)   2 1.0970e+10 2.9544e+14 723156
## 
## Step:  AIC=723151.2
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall) + 
##     as.factor(geo_withinmr100) + as.factor(char_oheat) + as.factor(char_repair_cnd) + 
##     as.factor(geo_floodplain) + char_hd_sf + char_rooms + as.factor(char_roof_cnst) + 
##     as.factor(char_gar1_att) + as.factor(geo_withinmr101300) + 
##     as.factor(char_heat)
## 
##                               Df  Sum of Sq        RSS    AIC
## + as.factor(ind_garage)        1 2.1116e+10 2.9536e+14 723151
## <none>                                      2.9538e+14 723151
## + as.factor(char_tp_plan)      1 8.6039e+09 2.9537e+14 723152
## + geo_fs_flood_risk_direction  1 8.4412e+09 2.9537e+14 723152
## + as.factor(char_gar1_area)    1 7.6773e+08 2.9538e+14 723153
## + as.factor(char_attic_type)   2 1.0961e+10 2.9537e+14 723154
## 
## Step:  AIC=723151
## sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall) + 
##     as.factor(geo_withinmr100) + as.factor(char_oheat) + as.factor(char_repair_cnd) + 
##     as.factor(geo_floodplain) + char_hd_sf + char_rooms + as.factor(char_roof_cnst) + 
##     as.factor(char_gar1_att) + as.factor(geo_withinmr101300) + 
##     as.factor(char_heat) + as.factor(ind_garage)
## 
##                               Df  Sum of Sq        RSS    AIC
## <none>                                      2.9536e+14 723151
## + geo_fs_flood_risk_direction  1 8.5507e+09 2.9535e+14 723152
## + as.factor(char_tp_plan)      1 8.5155e+09 2.9535e+14 723152
## + as.factor(char_gar1_area)    1 7.8388e+08 2.9536e+14 723153
## + as.factor(char_attic_type)   2 1.0921e+10 2.9535e+14 723154
summary(lm.step.forward)  
## 
## Call:
## lm(formula = sale_price ~ econ_midincome + char_bldg_sf + econ_tax_rate + 
##     as.factor(char_air) + as.factor(char_type_resd) + as.factor(char_bsmt) + 
##     as.factor(char_gar1_size) + as.factor(geo_ohare_noise) + 
##     as.factor(char_bsmt_fin) + geo_fs_flood_factor + char_frpl + 
##     char_age + char_hbath + char_fbath + char_beds + as.factor(char_ext_wall) + 
##     as.factor(geo_withinmr100) + as.factor(char_oheat) + as.factor(char_repair_cnd) + 
##     as.factor(geo_floodplain) + char_hd_sf + char_rooms + as.factor(char_roof_cnst) + 
##     as.factor(char_gar1_att) + as.factor(geo_withinmr101300) + 
##     as.factor(char_heat) + as.factor(ind_garage), data = df_trn_y)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -438080  -63607   -5888   53186  508253 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     5.932e+04  2.451e+04   2.420 0.015508 *  
## econ_midincome                  2.419e+00  2.063e-02 117.244  < 2e-16 ***
## char_bldg_sf                    7.239e+01  1.858e+00  38.968  < 2e-16 ***
## econ_tax_rate                  -6.870e+03  1.250e+02 -54.952  < 2e-16 ***
## as.factor(char_air)2           -2.136e+04  1.366e+03 -15.630  < 2e-16 ***
## as.factor(char_type_resd)2     -4.154e+03  1.707e+03  -2.433 0.014991 *  
## as.factor(char_type_resd)3      8.120e+04  4.951e+03  16.402  < 2e-16 ***
## as.factor(char_type_resd)4      6.562e+03  2.638e+03   2.488 0.012848 *  
## as.factor(char_type_resd)5     -1.118e+03  2.009e+03  -0.556 0.577985    
## as.factor(char_type_resd)6      3.966e+04  3.248e+04   1.221 0.221957    
## as.factor(char_type_resd)7      6.939e+04  2.694e+04   2.576 0.010008 *  
## as.factor(char_type_resd)8      4.342e+04  3.668e+04   1.184 0.236531    
## as.factor(char_type_resd)9      4.871e+04  9.697e+04   0.502 0.615439    
## as.factor(char_bsmt)2          -1.699e+04  1.907e+03  -8.912  < 2e-16 ***
## as.factor(char_bsmt)3          -1.178e+04  1.795e+03  -6.566 5.26e-11 ***
## as.factor(char_bsmt)4          -2.406e+04  2.464e+03  -9.762  < 2e-16 ***
## as.factor(char_gar1_size)2      2.176e+03  2.598e+03   0.838 0.402293    
## as.factor(char_gar1_size)3      9.949e+03  1.638e+03   6.073 1.27e-09 ***
## as.factor(char_gar1_size)4      3.349e+03  3.122e+03   1.073 0.283400    
## as.factor(char_gar1_size)5      2.327e+04  4.487e+03   5.186 2.16e-07 ***
## as.factor(char_gar1_size)6      4.043e+04  1.629e+04   2.482 0.013084 *  
## as.factor(char_gar1_size)7     -4.906e+04  2.321e+04  -2.114 0.034528 *  
## as.factor(char_gar1_size)8     -6.409e+03  1.726e+04  -0.371 0.710373    
## as.factor(geo_ohare_noise)1     5.115e+04  4.561e+03  11.215  < 2e-16 ***
## as.factor(char_bsmt_fin)2      -1.974e+04  5.267e+03  -3.748 0.000179 ***
## as.factor(char_bsmt_fin)3      -1.209e+04  1.485e+03  -8.142 4.04e-16 ***
## geo_fs_flood_factor             3.417e+03  3.411e+02  10.017  < 2e-16 ***
## char_frpl                       9.993e+03  1.319e+03   7.574 3.73e-14 ***
## char_age                        2.286e+02  2.774e+01   8.241  < 2e-16 ***
## char_hbath                      1.208e+04  1.235e+03   9.778  < 2e-16 ***
## char_fbath                      1.262e+04  1.319e+03   9.569  < 2e-16 ***
## char_beds                      -4.465e+03  1.049e+03  -4.256 2.08e-05 ***
## as.factor(char_ext_wall)2       2.868e+03  1.441e+03   1.990 0.046562 *  
## as.factor(char_ext_wall)3       2.753e+03  1.585e+03   1.737 0.082440 .  
## as.factor(char_ext_wall)4       2.940e+04  4.708e+03   6.244 4.32e-10 ***
## as.factor(geo_withinmr100)1    -3.201e+04  6.680e+03  -4.792 1.66e-06 ***
## as.factor(char_oheat)5          1.216e+04  2.989e+03   4.069 4.74e-05 ***
## as.factor(char_repair_cnd)2    -2.660e+04  6.633e+03  -4.010 6.10e-05 ***
## as.factor(char_repair_cnd)3    -1.788e+04  1.034e+04  -1.730 0.083684 .  
## as.factor(geo_floodplain)1     -2.188e+04  5.246e+03  -4.171 3.04e-05 ***
## char_hd_sf                      2.668e-01  6.534e-02   4.084 4.44e-05 ***
## char_rooms                     -1.558e+03  5.626e+02  -2.769 0.005629 ** 
## as.factor(char_roof_cnst)2     -7.096e+02  2.604e+03  -0.272 0.785242    
## as.factor(char_roof_cnst)3     -1.203e+04  1.207e+04  -0.997 0.318993    
## as.factor(char_roof_cnst)4      1.483e+04  8.838e+03   1.678 0.093283 .  
## as.factor(char_roof_cnst)5     -9.702e+03  8.027e+03  -1.209 0.226819    
## as.factor(char_roof_cnst)6      2.999e+04  9.174e+03   3.270 0.001078 ** 
## as.factor(char_gar1_att)2       4.385e+03  1.720e+03   2.550 0.010788 *  
## as.factor(geo_withinmr101300)1 -5.003e+03  2.302e+03  -2.173 0.029756 *  
## as.factor(char_heat)2           2.636e+03  1.776e+03   1.485 0.137679    
## as.factor(char_heat)3          -1.666e+04  8.508e+03  -1.958 0.050245 .  
## as.factor(char_heat)4          -1.412e+04  1.212e+04  -1.165 0.243969    
## as.factor(ind_garage)1         -3.463e+04  2.310e+04  -1.499 0.133846    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 96930 on 31436 degrees of freedom
## Multiple R-squared:  0.5768, Adjusted R-squared:  0.5761 
## F-statistic:   824 on 52 and 31436 DF,  p-value: < 2.2e-16
lm.step.pred.forward <- predict(lm.step.forward, df_tst_y)
head(lm.step.pred.forward)
##         3         9        15        20        22        23 
##  57299.74 191440.69 195804.08  91136.73 207192.23 255702.49
mean((df_tst_y$sale_price-lm.step.pred.forward)^2)
## [1] 9140828696

Stepwise Regression

lm.step.both <- step(lm_full_y, direction = "both")
## Start:  AIC=723157.9
## sale_price ~ char_rooms + char_frpl + char_bldg_sf + econ_tax_rate + 
##     char_age + char_fbath + econ_midincome + as.factor(geo_floodplain) + 
##     as.factor(geo_withinmr100) + as.factor(geo_ohare_noise) + 
##     as.factor(char_heat) + as.factor(char_gar1_att) + as.factor(char_bsmt) + 
##     as.factor(char_attic_type) + as.factor(char_tp_plan) + char_beds + 
##     char_hbath + char_hd_sf + geo_fs_flood_factor + geo_fs_flood_risk_direction + 
##     as.factor(geo_withinmr101300) + as.factor(ind_garage) + as.factor(char_ext_wall) + 
##     as.factor(char_type_resd) + as.factor(char_roof_cnst) + as.factor(char_oheat) + 
##     as.factor(char_gar1_size) + as.factor(char_gar1_area) + as.factor(char_repair_cnd) + 
##     as.factor(char_bsmt_fin) + as.factor(char_air)
## 
##                                 Df  Sum of Sq        RSS    AIC
## - as.factor(char_attic_type)     2 1.1096e+10 2.9534e+14 723155
## - as.factor(char_gar1_area)      1 9.2341e+08 2.9533e+14 723156
## - geo_fs_flood_risk_direction    1 8.7545e+09 2.9534e+14 723157
## - as.factor(char_tp_plan)        1 8.7990e+09 2.9534e+14 723157
## <none>                                        2.9533e+14 723158
## - as.factor(ind_garage)          1 2.1105e+10 2.9535e+14 723158
## - as.factor(char_heat)           3 7.3081e+10 2.9541e+14 723160
## - as.factor(geo_withinmr101300)  1 4.3979e+10 2.9538e+14 723161
## - as.factor(char_gar1_att)       1 6.0268e+10 2.9539e+14 723162
## - char_rooms                     1 7.1911e+10 2.9540e+14 723164
## - as.factor(char_roof_cnst)      5 1.5507e+11 2.9549e+14 723164
## - as.factor(char_repair_cnd)     2 1.6704e+11 2.9550e+14 723172
## - as.factor(char_oheat)          1 1.5429e+11 2.9549e+14 723172
## - char_hd_sf                     1 1.5629e+11 2.9549e+14 723173
## - char_beds                      1 1.6655e+11 2.9550e+14 723174
## - as.factor(geo_floodplain)      1 1.6692e+11 2.9550e+14 723174
## - as.factor(geo_withinmr100)     1 2.1360e+11 2.9555e+14 723179
## - as.factor(char_ext_wall)       3 3.7769e+11 2.9571e+14 723192
## - as.factor(char_gar1_size)      7 6.3707e+11 2.9597e+14 723212
## - char_frpl                      1 5.3576e+11 2.9587e+14 723213
## - char_age                       1 6.0852e+11 2.9594e+14 723221
## - geo_fs_flood_factor            1 6.2937e+11 2.9596e+14 723223
## - as.factor(char_bsmt_fin)       2 6.6498e+11 2.9600e+14 723225
## - char_fbath                     1 8.6757e+11 2.9620e+14 723248
## - char_hbath                     1 8.9660e+11 2.9623e+14 723251
## - as.factor(geo_ohare_noise)     1 1.1778e+12 2.9651e+14 723281
## - as.factor(char_bsmt)           3 1.4972e+12 2.9683e+14 723311
## - as.factor(char_air)            1 2.2923e+12 2.9762e+14 723399
## - as.factor(char_type_resd)      8 3.1398e+12 2.9847e+14 723475
## - char_bldg_sf                   1 1.4192e+13 3.0952e+14 724634
## - econ_tax_rate                  1 2.8326e+13 3.2366e+14 726040
## - econ_midincome                 1 1.2855e+14 4.2388e+14 734534
## 
## Step:  AIC=723155.1
## sale_price ~ char_rooms + char_frpl + char_bldg_sf + econ_tax_rate + 
##     char_age + char_fbath + econ_midincome + as.factor(geo_floodplain) + 
##     as.factor(geo_withinmr100) + as.factor(geo_ohare_noise) + 
##     as.factor(char_heat) + as.factor(char_gar1_att) + as.factor(char_bsmt) + 
##     as.factor(char_tp_plan) + char_beds + char_hbath + char_hd_sf + 
##     geo_fs_flood_factor + geo_fs_flood_risk_direction + as.factor(geo_withinmr101300) + 
##     as.factor(ind_garage) + as.factor(char_ext_wall) + as.factor(char_type_resd) + 
##     as.factor(char_roof_cnst) + as.factor(char_oheat) + as.factor(char_gar1_size) + 
##     as.factor(char_gar1_area) + as.factor(char_repair_cnd) + 
##     as.factor(char_bsmt_fin) + as.factor(char_air)
## 
##                                 Df  Sum of Sq        RSS    AIC
## - as.factor(char_gar1_area)      1 8.8360e+08 2.9534e+14 723153
## - geo_fs_flood_risk_direction    1 8.6854e+09 2.9535e+14 723154
## - as.factor(char_tp_plan)        1 8.7293e+09 2.9535e+14 723154
## <none>                                        2.9534e+14 723155
## - as.factor(ind_garage)          1 2.1154e+10 2.9536e+14 723155
## - as.factor(char_heat)           3 7.3142e+10 2.9542e+14 723157
## - as.factor(geo_withinmr101300)  1 4.3714e+10 2.9539e+14 723158
## + as.factor(char_attic_type)     2 1.1096e+10 2.9533e+14 723158
## - as.factor(char_gar1_att)       1 6.0715e+10 2.9540e+14 723160
## - char_rooms                     1 7.1397e+10 2.9541e+14 723161
## - as.factor(char_roof_cnst)      5 1.5375e+11 2.9550e+14 723161
## - as.factor(char_repair_cnd)     2 1.6679e+11 2.9551e+14 723169
## - as.factor(char_oheat)          1 1.5583e+11 2.9550e+14 723170
## - char_hd_sf                     1 1.5827e+11 2.9550e+14 723170
## - char_beds                      1 1.6630e+11 2.9551e+14 723171
## - as.factor(geo_floodplain)      1 1.6710e+11 2.9551e+14 723171
## - as.factor(geo_withinmr100)     1 2.1446e+11 2.9556e+14 723176
## - as.factor(char_ext_wall)       3 3.8085e+11 2.9572e+14 723190
## - as.factor(char_gar1_size)      7 6.3466e+11 2.9598e+14 723209
## - char_frpl                      1 5.3916e+11 2.9588e+14 723210
## - geo_fs_flood_factor            1 6.2751e+11 2.9597e+14 723220
## - char_age                       1 6.3696e+11 2.9598e+14 723221
## - as.factor(char_bsmt_fin)       2 6.6923e+11 2.9601e+14 723222
## - char_fbath                     1 8.6254e+11 2.9621e+14 723245
## - char_hbath                     1 8.9736e+11 2.9624e+14 723249
## - as.factor(geo_ohare_noise)     1 1.1808e+12 2.9652e+14 723279
## - as.factor(char_bsmt)           3 1.4957e+12 2.9684e+14 723308
## - as.factor(char_air)            1 2.2964e+12 2.9764e+14 723397
## - as.factor(char_type_resd)      8 3.1538e+12 2.9850e+14 723474
## - char_bldg_sf                   1 1.4197e+13 3.0954e+14 724632
## - econ_tax_rate                  1 2.8369e+13 3.2371e+14 726041
## - econ_midincome                 1 1.2899e+14 4.2433e+14 734564
## 
## Step:  AIC=723153.2
## sale_price ~ char_rooms + char_frpl + char_bldg_sf + econ_tax_rate + 
##     char_age + char_fbath + econ_midincome + as.factor(geo_floodplain) + 
##     as.factor(geo_withinmr100) + as.factor(geo_ohare_noise) + 
##     as.factor(char_heat) + as.factor(char_gar1_att) + as.factor(char_bsmt) + 
##     as.factor(char_tp_plan) + char_beds + char_hbath + char_hd_sf + 
##     geo_fs_flood_factor + geo_fs_flood_risk_direction + as.factor(geo_withinmr101300) + 
##     as.factor(ind_garage) + as.factor(char_ext_wall) + as.factor(char_type_resd) + 
##     as.factor(char_roof_cnst) + as.factor(char_oheat) + as.factor(char_gar1_size) + 
##     as.factor(char_repair_cnd) + as.factor(char_bsmt_fin) + as.factor(char_air)
## 
##                                 Df  Sum of Sq        RSS    AIC
## - as.factor(char_tp_plan)        1 8.6392e+09 2.9535e+14 723152
## - geo_fs_flood_risk_direction    1 8.6744e+09 2.9535e+14 723152
## <none>                                        2.9534e+14 723153
## - as.factor(ind_garage)          1 2.1137e+10 2.9537e+14 723153
## - as.factor(char_heat)           3 7.3461e+10 2.9542e+14 723155
## + as.factor(char_gar1_area)      1 8.8360e+08 2.9534e+14 723155
## - as.factor(geo_withinmr101300)  1 4.3665e+10 2.9539e+14 723156
## + as.factor(char_attic_type)     2 1.1056e+10 2.9533e+14 723156
## - as.factor(char_gar1_att)       1 6.0992e+10 2.9541e+14 723158
## - char_rooms                     1 7.1681e+10 2.9542e+14 723159
## - as.factor(char_roof_cnst)      5 1.5376e+11 2.9550e+14 723160
## - as.factor(char_repair_cnd)     2 1.6699e+11 2.9551e+14 723167
## - as.factor(char_oheat)          1 1.5571e+11 2.9550e+14 723168
## - char_hd_sf                     1 1.5747e+11 2.9550e+14 723168
## - as.factor(geo_floodplain)      1 1.6708e+11 2.9551e+14 723169
## - char_beds                      1 1.7084e+11 2.9552e+14 723169
## - as.factor(geo_withinmr100)     1 2.1461e+11 2.9556e+14 723174
## - as.factor(char_ext_wall)       3 3.8074e+11 2.9573e+14 723188
## - as.factor(char_gar1_size)      7 6.3926e+11 2.9598e+14 723207
## - char_frpl                      1 5.3960e+11 2.9588e+14 723209
## - geo_fs_flood_factor            1 6.2772e+11 2.9597e+14 723218
## - char_age                       1 6.4341e+11 2.9599e+14 723220
## - as.factor(char_bsmt_fin)       2 6.6868e+11 2.9601e+14 723220
## - char_fbath                     1 8.6201e+11 2.9621e+14 723243
## - char_hbath                     1 8.9802e+11 2.9624e+14 723247
## - as.factor(geo_ohare_noise)     1 1.1802e+12 2.9652e+14 723277
## - as.factor(char_bsmt)           3 1.4981e+12 2.9684e+14 723306
## - as.factor(char_air)            1 2.2970e+12 2.9764e+14 723395
## - as.factor(char_type_resd)      8 3.2538e+12 2.9860e+14 723482
## - char_bldg_sf                   1 1.4272e+13 3.0962e+14 724637
## - econ_tax_rate                  1 2.8369e+13 3.2371e+14 726039
## - econ_midincome                 1 1.2911e+14 4.2445e+14 734571
## 
## Step:  AIC=723152.1
## sale_price ~ char_rooms + char_frpl + char_bldg_sf + econ_tax_rate + 
##     char_age + char_fbath + econ_midincome + as.factor(geo_floodplain) + 
##     as.factor(geo_withinmr100) + as.factor(geo_ohare_noise) + 
##     as.factor(char_heat) + as.factor(char_gar1_att) + as.factor(char_bsmt) + 
##     char_beds + char_hbath + char_hd_sf + geo_fs_flood_factor + 
##     geo_fs_flood_risk_direction + as.factor(geo_withinmr101300) + 
##     as.factor(ind_garage) + as.factor(char_ext_wall) + as.factor(char_type_resd) + 
##     as.factor(char_roof_cnst) + as.factor(char_oheat) + as.factor(char_gar1_size) + 
##     as.factor(char_repair_cnd) + as.factor(char_bsmt_fin) + as.factor(char_air)
## 
##                                 Df  Sum of Sq        RSS    AIC
## - geo_fs_flood_risk_direction    1 8.5507e+09 2.9536e+14 723151
## <none>                                        2.9535e+14 723152
## - as.factor(ind_garage)          1 2.1226e+10 2.9537e+14 723152
## + as.factor(char_tp_plan)        1 8.6392e+09 2.9534e+14 723153
## - as.factor(char_heat)           3 7.3030e+10 2.9543e+14 723154
## + as.factor(char_gar1_area)      1 7.9356e+08 2.9535e+14 723154
## - as.factor(geo_withinmr101300)  1 4.3814e+10 2.9540e+14 723155
## + as.factor(char_attic_type)     2 1.0990e+10 2.9534e+14 723155
## - as.factor(char_gar1_att)       1 6.1318e+10 2.9541e+14 723157
## - char_rooms                     1 7.1545e+10 2.9542e+14 723158
## - as.factor(char_roof_cnst)      5 1.5145e+11 2.9550e+14 723158
## - as.factor(char_repair_cnd)     2 1.6236e+11 2.9552e+14 723165
## - as.factor(char_oheat)          1 1.5532e+11 2.9551e+14 723167
## - char_hd_sf                     1 1.5698e+11 2.9551e+14 723167
## - as.factor(geo_floodplain)      1 1.6630e+11 2.9552e+14 723168
## - char_beds                      1 1.7002e+11 2.9552e+14 723168
## - as.factor(geo_withinmr100)     1 2.1401e+11 2.9557e+14 723173
## - as.factor(char_ext_wall)       3 3.7966e+11 2.9573e+14 723187
## - as.factor(char_gar1_size)      7 6.4187e+11 2.9599e+14 723206
## - char_frpl                      1 5.3832e+11 2.9589e+14 723207
## - geo_fs_flood_factor            1 6.2652e+11 2.9598e+14 723217
## - char_age                       1 6.3987e+11 2.9599e+14 723218
## - as.factor(char_bsmt_fin)       2 6.6791e+11 2.9602e+14 723219
## - char_fbath                     1 8.6016e+11 2.9621e+14 723242
## - char_hbath                     1 8.9864e+11 2.9625e+14 723246
## - as.factor(geo_ohare_noise)     1 1.1806e+12 2.9653e+14 723276
## - as.factor(char_bsmt)           3 1.4970e+12 2.9685e+14 723305
## - as.factor(char_air)            1 2.2926e+12 2.9765e+14 723394
## - as.factor(char_type_resd)      8 3.2543e+12 2.9861e+14 723481
## - char_bldg_sf                   1 1.4268e+13 3.0962e+14 724636
## - econ_tax_rate                  1 2.8380e+13 3.2373e+14 726039
## - econ_midincome                 1 1.2910e+14 4.2446e+14 734569
## 
## Step:  AIC=723151
## sale_price ~ char_rooms + char_frpl + char_bldg_sf + econ_tax_rate + 
##     char_age + char_fbath + econ_midincome + as.factor(geo_floodplain) + 
##     as.factor(geo_withinmr100) + as.factor(geo_ohare_noise) + 
##     as.factor(char_heat) + as.factor(char_gar1_att) + as.factor(char_bsmt) + 
##     char_beds + char_hbath + char_hd_sf + geo_fs_flood_factor + 
##     as.factor(geo_withinmr101300) + as.factor(ind_garage) + as.factor(char_ext_wall) + 
##     as.factor(char_type_resd) + as.factor(char_roof_cnst) + as.factor(char_oheat) + 
##     as.factor(char_gar1_size) + as.factor(char_repair_cnd) + 
##     as.factor(char_bsmt_fin) + as.factor(char_air)
## 
##                                 Df  Sum of Sq        RSS    AIC
## <none>                                        2.9536e+14 723151
## - as.factor(ind_garage)          1 2.1116e+10 2.9538e+14 723151
## + geo_fs_flood_risk_direction    1 8.5507e+09 2.9535e+14 723152
## + as.factor(char_tp_plan)        1 8.5155e+09 2.9535e+14 723152
## - as.factor(char_heat)           3 7.2798e+10 2.9543e+14 723153
## + as.factor(char_gar1_area)      1 7.8388e+08 2.9536e+14 723153
## - as.factor(geo_withinmr101300)  1 4.4383e+10 2.9541e+14 723154
## + as.factor(char_attic_type)     2 1.0921e+10 2.9535e+14 723154
## - as.factor(char_gar1_att)       1 6.1077e+10 2.9542e+14 723155
## - char_rooms                     1 7.2032e+10 2.9543e+14 723157
## - as.factor(char_roof_cnst)      5 1.5187e+11 2.9551e+14 723157
## - as.factor(char_repair_cnd)     2 1.6169e+11 2.9552e+14 723164
## - as.factor(char_oheat)          1 1.5554e+11 2.9552e+14 723166
## - char_hd_sf                     1 1.5671e+11 2.9552e+14 723166
## - as.factor(geo_floodplain)      1 1.6347e+11 2.9552e+14 723166
## - char_beds                      1 1.7023e+11 2.9553e+14 723167
## - as.factor(geo_withinmr100)     1 2.1572e+11 2.9558e+14 723172
## - as.factor(char_ext_wall)       3 3.7939e+11 2.9574e+14 723185
## - as.factor(char_gar1_size)      7 6.4216e+11 2.9600e+14 723205
## - char_frpl                      1 5.3893e+11 2.9590e+14 723206
## - char_age                       1 6.3805e+11 2.9600e+14 723217
## - as.factor(char_bsmt_fin)       2 6.6642e+11 2.9603e+14 723218
## - char_fbath                     1 8.6034e+11 2.9622e+14 723241
## - char_hbath                     1 8.9825e+11 2.9626e+14 723245
## - geo_fs_flood_factor            1 9.4283e+11 2.9630e+14 723249
## - as.factor(geo_ohare_noise)     1 1.1818e+12 2.9654e+14 723275
## - as.factor(char_bsmt)           3 1.4959e+12 2.9686e+14 723304
## - as.factor(char_air)            1 2.2954e+12 2.9766e+14 723393
## - as.factor(char_type_resd)      8 3.2641e+12 2.9863e+14 723481
## - char_bldg_sf                   1 1.4267e+13 3.0963e+14 724634
## - econ_tax_rate                  1 2.8373e+13 3.2373e+14 726037
## - econ_midincome                 1 1.2915e+14 4.2451e+14 734572
lm.step.pred.both <- predict(lm.step.both, df_tst_y)
head(lm.step.pred.both)
##         3         9        15        20        22        23 
##  57299.74 191440.69 195804.08  91136.73 207192.23 255702.49
mse2 = mean((df_tst_y$sale_price-lm.step.pred.both)^2)

Lasso Regression

x <- model.matrix(sale_price~.,y)[,-1]
yy <- y$sale_price
train.index <- sample(c(1:dim(x)[1]), dim(x)[1]*0.7)
test.index <- (-train.index)
yy.test <- yy[test.index]
fit<- glmnet(x[train.index,],yy[train.index],alpha=1)
dim(coef(fit))
## [1] 33 73
lambda.small <- fit$lambda[74]
cv.fit <- cv.glmnet(x[train.index,],yy[train.index],alpha=1, type.measure="mse")
lambda.best <- cv.fit$lambda.min
coef.lambda.best <- predict(cv.fit,s=lambda.best,type="coefficients")[1:33,]
pred.lambda.best <- predict(cv.fit,s=lambda.best,newx=x[test.index,])
mse3 = mean((yy.test-pred.lambda.best)^2)
plot(cv.fit)

coef(cv.fit, cv.fit$lambda.min)
## 33 x 1 sparse Matrix of class "dgCMatrix"
##                                        s1
## (Intercept)                  1.963911e+04
## char_hd_sf                   2.440168e-01
## char_age                     1.725008e+02
## char_ext_wall                4.496636e+03
## char_roof_cnst               3.449236e+03
## char_rooms                  -1.515727e+03
## char_beds                   -3.631527e+03
## char_bsmt                   -7.315436e+03
## char_bsmt_fin               -6.715945e+03
## char_heat                    3.167610e+03
## char_oheat                   3.959111e+03
## char_air                    -2.202142e+04
## char_frpl                    1.013597e+04
## char_attic_type              .           
## char_fbath                   1.414368e+04
## char_hbath                   1.323162e+04
## char_tp_plan                 1.344806e+03
## char_gar1_size               3.206762e+03
## char_gar1_cnst               .           
## char_gar1_att                4.288820e+03
## char_gar1_area              -7.092916e+03
## char_bldg_sf                 6.765470e+01
## char_repair_cnd             -1.662043e+04
## char_type_resd               1.412648e+03
## geo_ohare_noise              5.459936e+04
## geo_floodplain              -2.417397e+04
## geo_fs_flood_factor          4.397047e+03
## geo_fs_flood_risk_direction -3.109374e+03
## geo_withinmr100             -2.252768e+04
## geo_withinmr101300          -3.003767e+03
## econ_tax_rate               -7.086786e+03
## econ_midincome               2.416726e+00
## ind_garage                   3.698402e+04

Result

tibble(
  "Model" = c("Linear Regression", "Stepwise Regression", "Lasso Regression"),
  "MSE" = c(mse1, mse2, mse3)) %>% 
  kable(digits = 2) 
Model MSE
Linear Regression 9138340041
Stepwise Regression 9140828696
Lasso Regression 9374324815

Prediction on the predict dataset

val = select(val, -c(meta_cdu,char_tp_dsgn,char_attic_fnsh,char_renovation,char_porch,char_apts,
              ind_arms_length, char_ot_impr, char_use,geo_property_zip,geo_black_perc,geo_other_perc,
              char_cnst_qlty,meta_town_code,meta_certified_est_land,geo_property_city,geo_white_perc,
              geo_his_perc,geo_municipality,ind_large_home,meta_class,meta_certified_est_bldg,geo_tract_pop,
              geo_asian_perc,geo_fips,char_site,meta_deed_type, meta_nbhd, geo_school_hs_district, geo_school_elem_district))
#colnames(val)
#Replace missing values
val$geo_floodplain[is.na(val$geo_floodplain)]<-0
val$geo_ohare_noise[is.na(val$geo_ohare_noise)]<-0
val$geo_withinmr100[is.na(val$geo_withinmr100)]<-0
val$geo_withinmr101300[is.na(val$geo_withinmr101300)]<-0
val$ind_garage[is.na(val$ind_garage)]<-0

val$char_ext_wall[is.na(val$char_ext_wall)]<-calc_mode(val$char_ext_wall)
val$char_roof_cnst[is.na(val$char_roof_cnst)]<-calc_mode(val$char_roof_cnst)
val$char_bsmt[is.na(val$char_bsmt)]<-calc_mode(val$char_bsmt)
val$char_bsmt_fin[is.na(val$char_bsmt_fin)]<-calc_mode(val$char_bsmt_fin)
val$char_heat[is.na(val$char_heat)]<-calc_mode(val$char_heat)
val$char_oheat[is.na(val$char_oheat)]<-calc_mode(val$char_oheat)
val$char_air[is.na(val$char_air)]<-calc_mode(val$char_air)
val$char_attic_tvalpe[is.na(val$char_attic_tvalpe)]<-calc_mode(val$char_attic_tvalpe)
val$char_tp_plan[is.na(val$char_tp_plan)]<-calc_mode(val$char_tp_plan)
val$char_gar1_size[is.na(val$char_gar1_size)]<-calc_mode(val$char_gar1_size)
val$char_gar1_cnst[is.na(val$char_gar1_cnst)]<-calc_mode(val$char_gar1_cnst)
val$char_gar1_att[is.na(val$char_gar1_att)]<-calc_mode(val$char_gar1_att)
val$char_gar1_area[is.na(val$char_gar1_area)]<-calc_mode(val$char_gar1_area)
val$char_repair_cnd[is.na(val$char_repair_cnd)]<-calc_mode(val$char_repair_cnd)
val$char_tvalpe_resd[is.na(val$char_tvalpe_resd)]<-calc_mode(val$char_tvalpe_resd)

val$char_frpl[is.na(val$char_frpl)]<-median(val$char_frpl, na.rm = TRUE)
val$econ_midincome[is.na(val$econ_midincome)]<-median(val$econ_midincome, na.rm = TRUE)
val$geo_fs_flood_factor[is.na(val$geo_fs_flood_factor)]<-median(val$geo_fs_flood_factor, na.rm = TRUE)
val$geo_fs_flood_risk_direction[is.na(val$geo_fs_flood_risk_direction)]<-median(val$geo_fs_flood_risk_direction, na.rm = TRUE)
#Prediction
pred=predict.lm(lm_full_y, val)
pred_df = data.frame(pred)
colnames(pred_df) <- ('assessed_value')
pred_df[pred_df < 0] <- 0 
pred_df$assessed_value[is.na(pred_df$assessed_value)]<-0
head(pred_df)
##   assessed_value
## 1       292398.8
## 2       274389.5
## 3       503971.4
## 4       267550.0
## 5       267332.5
## 6       109068.2
write.csv(pred_df,"assessed_value.csv", row.names = TRUE)
summary(pred_df)
##  assessed_value   
##  Min.   :      0  
##  1st Qu.: 169387  
##  Median : 242378  
##  Mean   : 272946  
##  3rd Qu.: 347531  
##  Max.   :1345653